rumdl_lib/rules/
md044_proper_names.rs

1use crate::utils::fast_hash;
2use crate::utils::regex_cache::{escape_regex, get_cached_regex};
3
4use crate::rule::{Fix, LintError, LintResult, LintWarning, Rule, RuleCategory, Severity};
5use std::collections::{HashMap, HashSet};
6use std::sync::{Arc, Mutex};
7
8mod md044_config;
9pub use md044_config::MD044Config;
10
11type WarningPosition = (usize, usize, String); // (line, column, found_name)
12
13/// Rule MD044: Proper names should be capitalized
14///
15/// See [docs/md044.md](../../docs/md044.md) for full documentation, configuration, and examples.
16///
17/// This rule is triggered when proper names are not capitalized correctly in the document.
18/// For example, if you have defined "JavaScript" as a proper name, the rule will flag any
19/// occurrences of "javascript" or "Javascript" as violations.
20///
21/// ## Purpose
22///
23/// Ensuring consistent capitalization of proper names improves document quality and
24/// professionalism. This is especially important for technical documentation where
25/// product names, programming languages, and technologies often have specific
26/// capitalization conventions.
27///
28/// ## Configuration Options
29///
30/// The rule supports the following configuration options:
31///
32/// ```yaml
33/// MD044:
34///   names: []                # List of proper names to check for correct capitalization
35///   code-blocks: false       # Whether to check code blocks (default: false)
36/// ```
37///
38/// Example configuration:
39///
40/// ```yaml
41/// MD044:
42///   names: ["JavaScript", "Node.js", "TypeScript"]
43///   code-blocks: true
44/// ```
45///
46/// ## Performance Optimizations
47///
48/// This rule implements several performance optimizations:
49///
50/// 1. **Regex Caching**: Pre-compiles and caches regex patterns for each proper name
51/// 2. **Content Caching**: Caches results based on content hashing for repeated checks
52/// 3. **Efficient Text Processing**: Uses optimized algorithms to avoid redundant text processing
53/// 4. **Smart Code Block Detection**: Efficiently identifies and optionally excludes code blocks
54///
55/// ## Edge Cases Handled
56///
57/// - **Word Boundaries**: Only matches complete words, not substrings within other words
58/// - **Case Sensitivity**: Properly handles case-specific matching
59/// - **Code Blocks**: Optionally checks code blocks (controlled by code-blocks setting)
60/// - **Markdown Formatting**: Handles proper names within Markdown formatting elements
61///
62/// ## Fix Behavior
63///
64/// When fixing issues, this rule replaces incorrect capitalization with the correct form
65/// as defined in the configuration.
66///
67/// Check if a trimmed line is an inline config comment from a linting tool.
68/// Recognized tools: rumdl, markdownlint, Vale, and remark-lint.
69fn is_inline_config_comment(trimmed: &str) -> bool {
70    trimmed.starts_with("<!-- rumdl-")
71        || trimmed.starts_with("<!-- markdownlint-")
72        || trimmed.starts_with("<!-- vale off")
73        || trimmed.starts_with("<!-- vale on")
74        || (trimmed.starts_with("<!-- vale ") && trimmed.contains(" = "))
75        || trimmed.starts_with("<!-- vale style")
76        || trimmed.starts_with("<!-- lint disable ")
77        || trimmed.starts_with("<!-- lint enable ")
78        || trimmed.starts_with("<!-- lint ignore ")
79}
80
81#[derive(Clone)]
82pub struct MD044ProperNames {
83    config: MD044Config,
84    // Cache the combined regex pattern string
85    combined_pattern: Option<String>,
86    // Precomputed lowercase name variants for fast pre-checks
87    name_variants: Vec<String>,
88    // Cache for name violations by content hash
89    content_cache: Arc<Mutex<HashMap<u64, Vec<WarningPosition>>>>,
90}
91
92impl MD044ProperNames {
93    pub fn new(names: Vec<String>, code_blocks: bool) -> Self {
94        let config = MD044Config {
95            names,
96            code_blocks,
97            html_elements: true, // Default to checking HTML elements
98            html_comments: true, // Default to checking HTML comments
99        };
100        let combined_pattern = Self::create_combined_pattern(&config);
101        let name_variants = Self::build_name_variants(&config);
102        Self {
103            config,
104            combined_pattern,
105            name_variants,
106            content_cache: Arc::new(Mutex::new(HashMap::new())),
107        }
108    }
109
110    // Helper function for consistent ASCII normalization
111    fn ascii_normalize(s: &str) -> String {
112        s.replace(['é', 'è', 'ê', 'ë'], "e")
113            .replace(['à', 'á', 'â', 'ä', 'ã', 'å'], "a")
114            .replace(['ï', 'î', 'í', 'ì'], "i")
115            .replace(['ü', 'ú', 'ù', 'û'], "u")
116            .replace(['ö', 'ó', 'ò', 'ô', 'õ'], "o")
117            .replace('ñ', "n")
118            .replace('ç', "c")
119    }
120
121    pub fn from_config_struct(config: MD044Config) -> Self {
122        let combined_pattern = Self::create_combined_pattern(&config);
123        let name_variants = Self::build_name_variants(&config);
124        Self {
125            config,
126            combined_pattern,
127            name_variants,
128            content_cache: Arc::new(Mutex::new(HashMap::new())),
129        }
130    }
131
132    // Create a combined regex pattern for all proper names
133    fn create_combined_pattern(config: &MD044Config) -> Option<String> {
134        if config.names.is_empty() {
135            return None;
136        }
137
138        // Create patterns for all names and their variations
139        let mut patterns: Vec<String> = config
140            .names
141            .iter()
142            .flat_map(|name| {
143                let mut variations = vec![];
144                let lower_name = name.to_lowercase();
145
146                // Add the lowercase version
147                variations.push(escape_regex(&lower_name));
148
149                // Add version without dots
150                let lower_name_no_dots = lower_name.replace('.', "");
151                if lower_name != lower_name_no_dots {
152                    variations.push(escape_regex(&lower_name_no_dots));
153                }
154
155                // Add ASCII-normalized versions for common accented characters
156                let ascii_normalized = Self::ascii_normalize(&lower_name);
157
158                if ascii_normalized != lower_name {
159                    variations.push(escape_regex(&ascii_normalized));
160
161                    // Also add version without dots
162                    let ascii_no_dots = ascii_normalized.replace('.', "");
163                    if ascii_normalized != ascii_no_dots {
164                        variations.push(escape_regex(&ascii_no_dots));
165                    }
166                }
167
168                variations
169            })
170            .collect();
171
172        // Sort patterns by length (longest first) to avoid shorter patterns matching within longer ones
173        patterns.sort_by_key(|b| std::cmp::Reverse(b.len()));
174
175        // Combine all patterns into a single regex with capture groups
176        // Don't use \b as it doesn't work with Unicode - we'll check boundaries manually
177        Some(format!(r"(?i)({})", patterns.join("|")))
178    }
179
180    fn build_name_variants(config: &MD044Config) -> Vec<String> {
181        let mut variants = HashSet::new();
182        for name in &config.names {
183            let lower_name = name.to_lowercase();
184            variants.insert(lower_name.clone());
185
186            let lower_no_dots = lower_name.replace('.', "");
187            if lower_name != lower_no_dots {
188                variants.insert(lower_no_dots);
189            }
190
191            let ascii_normalized = Self::ascii_normalize(&lower_name);
192            if ascii_normalized != lower_name {
193                variants.insert(ascii_normalized.clone());
194
195                let ascii_no_dots = ascii_normalized.replace('.', "");
196                if ascii_normalized != ascii_no_dots {
197                    variants.insert(ascii_no_dots);
198                }
199            }
200        }
201
202        variants.into_iter().collect()
203    }
204
205    // Find all name violations in the content and return positions.
206    // `content_lower` is the pre-computed lowercase version of `content` to avoid redundant allocations.
207    fn find_name_violations(
208        &self,
209        content: &str,
210        ctx: &crate::lint_context::LintContext,
211        content_lower: &str,
212    ) -> Vec<WarningPosition> {
213        // Early return: if no names configured or content is empty
214        if self.config.names.is_empty() || content.is_empty() || self.combined_pattern.is_none() {
215            return Vec::new();
216        }
217
218        // Early return: quick check if any of the configured names might be in content
219        let has_potential_matches = self.name_variants.iter().any(|name| content_lower.contains(name));
220
221        if !has_potential_matches {
222            return Vec::new();
223        }
224
225        // Check if we have cached results
226        let hash = fast_hash(content);
227        {
228            // Use a separate scope for borrowing to minimize lock time
229            if let Ok(cache) = self.content_cache.lock()
230                && let Some(cached) = cache.get(&hash)
231            {
232                return cached.clone();
233            }
234        }
235
236        let mut violations = Vec::new();
237
238        // Get the regex from global cache
239        let combined_regex = match &self.combined_pattern {
240            Some(pattern) => match get_cached_regex(pattern) {
241                Ok(regex) => regex,
242                Err(_) => return Vec::new(),
243            },
244            None => return Vec::new(),
245        };
246
247        // Use ctx.lines for better performance
248        for (line_idx, line_info) in ctx.lines.iter().enumerate() {
249            let line_num = line_idx + 1;
250            let line = line_info.content(ctx.content);
251
252            // Skip code fence lines (```language or ~~~language)
253            let trimmed = line.trim_start();
254            if trimmed.starts_with("```") || trimmed.starts_with("~~~") {
255                continue;
256            }
257
258            // Skip if in code block (when code_blocks = false)
259            if !self.config.code_blocks && line_info.in_code_block {
260                continue;
261            }
262
263            // Skip if in HTML block (when html_elements = false)
264            if !self.config.html_elements && line_info.in_html_block {
265                continue;
266            }
267
268            // Skip HTML comments using pre-computed line flag
269            if !self.config.html_comments && line_info.in_html_comment {
270                continue;
271            }
272
273            // Skip JSX expressions and MDX comments (MDX flavor)
274            if line_info.in_jsx_expression || line_info.in_mdx_comment {
275                continue;
276            }
277
278            // Skip Obsidian comments (Obsidian flavor)
279            if line_info.in_obsidian_comment {
280                continue;
281            }
282
283            // For frontmatter lines, determine offset where checkable value content starts.
284            // YAML keys should not be checked against proper names - only values.
285            let fm_value_offset = if line_info.in_front_matter {
286                Self::frontmatter_value_offset(line)
287            } else {
288                0
289            };
290            if fm_value_offset == usize::MAX {
291                continue;
292            }
293
294            // Skip inline config comments (rumdl, markdownlint, Vale, remark-lint directives)
295            if is_inline_config_comment(trimmed) {
296                continue;
297            }
298
299            // Early return: skip lines that don't contain any potential matches
300            let line_lower = line.to_lowercase();
301            let has_line_matches = self.name_variants.iter().any(|name| line_lower.contains(name));
302
303            if !has_line_matches {
304                continue;
305            }
306
307            // Use the combined regex to find all matches in one pass
308            for cap in combined_regex.find_iter(line) {
309                let found_name = &line[cap.start()..cap.end()];
310
311                // Check word boundaries manually for Unicode support
312                let start_pos = cap.start();
313                let end_pos = cap.end();
314
315                // Skip matches in the key portion of frontmatter lines
316                if start_pos < fm_value_offset {
317                    continue;
318                }
319
320                // Skip matches inside HTML tag attributes (handles multi-line tags)
321                let byte_pos = line_info.byte_offset + start_pos;
322                if ctx.is_in_html_tag(byte_pos) {
323                    continue;
324                }
325
326                if !Self::is_at_word_boundary(line, start_pos, true) || !Self::is_at_word_boundary(line, end_pos, false)
327                {
328                    continue; // Not at word boundary
329                }
330
331                // Skip if in inline code when code_blocks is false
332                if !self.config.code_blocks {
333                    if ctx.is_in_code_block_or_span(byte_pos) {
334                        continue;
335                    }
336                    // pulldown-cmark doesn't parse markdown syntax inside HTML
337                    // comments, HTML blocks, or frontmatter, so backtick-wrapped
338                    // text isn't detected by is_in_code_block_or_span. Check directly.
339                    if (line_info.in_html_comment || line_info.in_html_block || line_info.in_front_matter)
340                        && Self::is_in_backtick_code_in_line(line, start_pos)
341                    {
342                        continue;
343                    }
344                }
345
346                // Skip if in link URL or reference definition
347                if Self::is_in_link(ctx, byte_pos) {
348                    continue;
349                }
350
351                // Skip if inside an angle-bracket URL (e.g., <https://...>)
352                // The link parser skips autolinks inside HTML comments,
353                // so we detect them directly in the line text.
354                if Self::is_in_angle_bracket_url(line, start_pos) {
355                    continue;
356                }
357
358                // Find which proper name this matches
359                if let Some(proper_name) = self.get_proper_name_for(found_name) {
360                    // Only flag if it's not already correct
361                    if found_name != proper_name {
362                        violations.push((line_num, cap.start() + 1, found_name.to_string()));
363                    }
364                }
365            }
366        }
367
368        // Store in cache (ignore if mutex is poisoned)
369        if let Ok(mut cache) = self.content_cache.lock() {
370            cache.insert(hash, violations.clone());
371        }
372        violations
373    }
374
375    /// Check if a byte position is within a link URL (not link text)
376    ///
377    /// Link text should be checked for proper names, but URLs should be skipped.
378    /// For `[text](url)` - check text, skip url
379    /// For `[text][ref]` - check text, skip reference portion
380    /// For `[[text]]` (WikiLinks) - check text, skip brackets
381    fn is_in_link(ctx: &crate::lint_context::LintContext, byte_pos: usize) -> bool {
382        use pulldown_cmark::LinkType;
383
384        // Binary search links (sorted by byte_offset) to find candidate containing byte_pos
385        let link_idx = ctx.links.partition_point(|link| link.byte_offset <= byte_pos);
386        if link_idx > 0 {
387            let link = &ctx.links[link_idx - 1];
388            if byte_pos < link.byte_end {
389                // WikiLinks [[text]] start with '[[', regular links [text] start with '['
390                let text_start = if matches!(link.link_type, LinkType::WikiLink { .. }) {
391                    link.byte_offset + 2
392                } else {
393                    link.byte_offset + 1
394                };
395                let text_end = text_start + link.text.len();
396
397                // If position is within the text portion, skip only if text is a URL
398                if byte_pos >= text_start && byte_pos < text_end {
399                    return Self::link_text_is_url(&link.text);
400                }
401                // Position is in the URL/reference portion, skip it
402                return true;
403            }
404        }
405
406        // Binary search images (sorted by byte_offset) to find candidate containing byte_pos
407        let image_idx = ctx.images.partition_point(|img| img.byte_offset <= byte_pos);
408        if image_idx > 0 {
409            let image = &ctx.images[image_idx - 1];
410            if byte_pos < image.byte_end {
411                // Image starts with '![' so alt text starts at byte_offset + 2
412                let alt_start = image.byte_offset + 2;
413                let alt_end = alt_start + image.alt_text.len();
414
415                // If position is within the alt text portion, don't skip
416                if byte_pos >= alt_start && byte_pos < alt_end {
417                    return false;
418                }
419                // Position is in the URL/reference portion, skip it
420                return true;
421            }
422        }
423
424        // Check pre-computed reference definitions
425        ctx.is_in_reference_def(byte_pos)
426    }
427
428    /// Check if link text is a URL that should not have proper name corrections.
429    /// Matches markdownlint behavior: skip text starting with `http://`, `https://`, or `www.`.
430    fn link_text_is_url(text: &str) -> bool {
431        let lower = text.trim().to_ascii_lowercase();
432        lower.starts_with("http://") || lower.starts_with("https://") || lower.starts_with("www.")
433    }
434
435    /// Check if a position within a line falls inside an angle-bracket URL (`<scheme://...>`).
436    ///
437    /// The link parser skips autolinks inside HTML comments, so `ctx.links` won't
438    /// contain them. This function detects angle-bracket URLs directly in the line
439    /// text, covering both HTML comments and regular text as a safety net.
440    fn is_in_angle_bracket_url(line: &str, pos: usize) -> bool {
441        let bytes = line.as_bytes();
442        let len = bytes.len();
443        let mut i = 0;
444        while i < len {
445            if bytes[i] == b'<' {
446                let after_open = i + 1;
447                // Check for a valid URI scheme per CommonMark autolink spec:
448                // scheme = [a-zA-Z][a-zA-Z0-9+.-]{0,31}
449                // followed by ':'
450                if after_open < len && bytes[after_open].is_ascii_alphabetic() {
451                    let mut s = after_open + 1;
452                    let scheme_max = (after_open + 32).min(len);
453                    while s < scheme_max
454                        && (bytes[s].is_ascii_alphanumeric()
455                            || bytes[s] == b'+'
456                            || bytes[s] == b'-'
457                            || bytes[s] == b'.')
458                    {
459                        s += 1;
460                    }
461                    if s < len && bytes[s] == b':' {
462                        // Valid scheme found; scan for closing '>' with no spaces or '<'
463                        let mut j = s + 1;
464                        let mut found_close = false;
465                        while j < len {
466                            match bytes[j] {
467                                b'>' => {
468                                    found_close = true;
469                                    break;
470                                }
471                                b' ' | b'<' => break,
472                                _ => j += 1,
473                            }
474                        }
475                        if found_close && pos >= i && pos <= j {
476                            return true;
477                        }
478                        if found_close {
479                            i = j + 1;
480                            continue;
481                        }
482                    }
483                }
484            }
485            i += 1;
486        }
487        false
488    }
489
490    /// Check if a position within a line falls inside backtick-delimited code.
491    ///
492    /// pulldown-cmark does not parse markdown syntax inside HTML comments, so
493    /// `ctx.is_in_code_block_or_span` returns false for backtick-wrapped text
494    /// within comments. This function detects backtick code spans directly in
495    /// the line text following CommonMark rules: a code span starts with N
496    /// backticks and ends with exactly N backticks.
497    fn is_in_backtick_code_in_line(line: &str, pos: usize) -> bool {
498        let bytes = line.as_bytes();
499        let len = bytes.len();
500        let mut i = 0;
501        while i < len {
502            if bytes[i] == b'`' {
503                // Count the opening backtick sequence length
504                let open_start = i;
505                while i < len && bytes[i] == b'`' {
506                    i += 1;
507                }
508                let tick_len = i - open_start;
509
510                // Scan forward for a closing sequence of exactly tick_len backticks
511                while i < len {
512                    if bytes[i] == b'`' {
513                        let close_start = i;
514                        while i < len && bytes[i] == b'`' {
515                            i += 1;
516                        }
517                        if i - close_start == tick_len {
518                            // Matched pair found; the code span content is between
519                            // the end of the opening backticks and the start of the
520                            // closing backticks (exclusive of the backticks themselves).
521                            let content_start = open_start + tick_len;
522                            let content_end = close_start;
523                            if pos >= content_start && pos < content_end {
524                                return true;
525                            }
526                            // Continue scanning after this pair
527                            break;
528                        }
529                        // Not the right length; keep scanning
530                    } else {
531                        i += 1;
532                    }
533                }
534            } else {
535                i += 1;
536            }
537        }
538        false
539    }
540
541    // Check if a character is a word boundary (handles Unicode)
542    fn is_word_boundary_char(c: char) -> bool {
543        !c.is_alphanumeric()
544    }
545
546    // Check if position is at a word boundary using byte-level lookups.
547    fn is_at_word_boundary(content: &str, pos: usize, is_start: bool) -> bool {
548        if is_start {
549            if pos == 0 {
550                return true;
551            }
552            match content[..pos].chars().next_back() {
553                None => true,
554                Some(c) => Self::is_word_boundary_char(c),
555            }
556        } else {
557            if pos >= content.len() {
558                return true;
559            }
560            match content[pos..].chars().next() {
561                None => true,
562                Some(c) => Self::is_word_boundary_char(c),
563            }
564        }
565    }
566
567    /// For a frontmatter line, return the byte offset where the checkable
568    /// value portion starts. Returns `usize::MAX` if the entire line should be
569    /// skipped (frontmatter delimiters, key-only lines, YAML comments, flow constructs).
570    fn frontmatter_value_offset(line: &str) -> usize {
571        let trimmed = line.trim();
572
573        // Skip frontmatter delimiters and empty lines
574        if trimmed == "---" || trimmed == "+++" || trimmed.is_empty() {
575            return usize::MAX;
576        }
577
578        // Skip YAML comments
579        if trimmed.starts_with('#') {
580            return usize::MAX;
581        }
582
583        // YAML list item: "  - item" or "  - key: value"
584        let stripped = line.trim_start();
585        if let Some(after_dash) = stripped.strip_prefix("- ") {
586            let leading = line.len() - stripped.len();
587            // Check if the list item contains a mapping (e.g., "- key: value")
588            if let Some(result) = Self::kv_value_offset(line, after_dash, leading + 2) {
589                return result;
590            }
591            // Bare list item value (no colon) - check content after "- "
592            return leading + 2;
593        }
594        if stripped == "-" {
595            return usize::MAX;
596        }
597
598        // Key-value pair with colon separator (YAML): "key: value"
599        if let Some(result) = Self::kv_value_offset(line, stripped, line.len() - stripped.len()) {
600            return result;
601        }
602
603        // Key-value pair with equals separator (TOML): "key = value"
604        if let Some(eq_pos) = line.find('=') {
605            let after_eq = eq_pos + 1;
606            if after_eq < line.len() && line.as_bytes()[after_eq] == b' ' {
607                let value_start = after_eq + 1;
608                let value_slice = &line[value_start..];
609                let value_trimmed = value_slice.trim();
610                if value_trimmed.is_empty() {
611                    return usize::MAX;
612                }
613                // For quoted values, skip the opening quote character
614                if (value_trimmed.starts_with('"') && value_trimmed.ends_with('"'))
615                    || (value_trimmed.starts_with('\'') && value_trimmed.ends_with('\''))
616                {
617                    let quote_offset = value_slice.find(['"', '\'']).unwrap_or(0);
618                    return value_start + quote_offset + 1;
619                }
620                return value_start;
621            }
622            // Equals with no space after or at end of line -> no value to check
623            return usize::MAX;
624        }
625
626        // No separator found - continuation line or bare value, check the whole line
627        0
628    }
629
630    /// Parse a key-value pair using colon separator within `content` that starts
631    /// at `base_offset` in the original line. Returns `Some(offset)` if a colon
632    /// separator is found, `None` if no colon is present.
633    fn kv_value_offset(line: &str, content: &str, base_offset: usize) -> Option<usize> {
634        let colon_pos = content.find(':')?;
635        let abs_colon = base_offset + colon_pos;
636        let after_colon = abs_colon + 1;
637        if after_colon < line.len() && line.as_bytes()[after_colon] == b' ' {
638            let value_start = after_colon + 1;
639            let value_slice = &line[value_start..];
640            let value_trimmed = value_slice.trim();
641            if value_trimmed.is_empty() {
642                return Some(usize::MAX);
643            }
644            // Skip flow mappings and flow sequences - too complex for heuristic parsing
645            if value_trimmed.starts_with('{') || value_trimmed.starts_with('[') {
646                return Some(usize::MAX);
647            }
648            // For quoted values, skip the opening quote character
649            if (value_trimmed.starts_with('"') && value_trimmed.ends_with('"'))
650                || (value_trimmed.starts_with('\'') && value_trimmed.ends_with('\''))
651            {
652                let quote_offset = value_slice.find(['"', '\'']).unwrap_or(0);
653                return Some(value_start + quote_offset + 1);
654            }
655            return Some(value_start);
656        }
657        // Colon with no space after or at end of line -> no value to check
658        Some(usize::MAX)
659    }
660
661    // Get the proper name that should be used for a found name
662    fn get_proper_name_for(&self, found_name: &str) -> Option<String> {
663        let found_lower = found_name.to_lowercase();
664
665        // Iterate through the configured proper names
666        for name in &self.config.names {
667            let lower_name = name.to_lowercase();
668            let lower_name_no_dots = lower_name.replace('.', "");
669
670            // Direct match
671            if found_lower == lower_name || found_lower == lower_name_no_dots {
672                return Some(name.clone());
673            }
674
675            // Check ASCII-normalized version
676            let ascii_normalized = Self::ascii_normalize(&lower_name);
677
678            let ascii_no_dots = ascii_normalized.replace('.', "");
679
680            if found_lower == ascii_normalized || found_lower == ascii_no_dots {
681                return Some(name.clone());
682            }
683        }
684        None
685    }
686}
687
688impl Rule for MD044ProperNames {
689    fn name(&self) -> &'static str {
690        "MD044"
691    }
692
693    fn description(&self) -> &'static str {
694        "Proper names should have the correct capitalization"
695    }
696
697    fn category(&self) -> RuleCategory {
698        RuleCategory::Other
699    }
700
701    fn should_skip(&self, ctx: &crate::lint_context::LintContext) -> bool {
702        if self.config.names.is_empty() {
703            return true;
704        }
705        // Quick check if any configured name variants exist (case-insensitive)
706        let content_lower = if ctx.content.is_ascii() {
707            ctx.content.to_ascii_lowercase()
708        } else {
709            ctx.content.to_lowercase()
710        };
711        !self.name_variants.iter().any(|name| content_lower.contains(name))
712    }
713
714    fn check(&self, ctx: &crate::lint_context::LintContext) -> LintResult {
715        let content = ctx.content;
716        if content.is_empty() || self.config.names.is_empty() || self.combined_pattern.is_none() {
717            return Ok(Vec::new());
718        }
719
720        // Compute lowercase content once and reuse across all checks
721        let content_lower = if content.is_ascii() {
722            content.to_ascii_lowercase()
723        } else {
724            content.to_lowercase()
725        };
726
727        // Early return: use pre-computed name_variants for the quick check
728        let has_potential_matches = self.name_variants.iter().any(|name| content_lower.contains(name));
729
730        if !has_potential_matches {
731            return Ok(Vec::new());
732        }
733
734        let line_index = &ctx.line_index;
735        let violations = self.find_name_violations(content, ctx, &content_lower);
736
737        let warnings = violations
738            .into_iter()
739            .filter_map(|(line, column, found_name)| {
740                self.get_proper_name_for(&found_name).map(|proper_name| LintWarning {
741                    rule_name: Some(self.name().to_string()),
742                    line,
743                    column,
744                    end_line: line,
745                    end_column: column + found_name.len(),
746                    message: format!("Proper name '{found_name}' should be '{proper_name}'"),
747                    severity: Severity::Warning,
748                    fix: Some(Fix {
749                        range: line_index.line_col_to_byte_range_with_length(line, column, found_name.len()),
750                        replacement: proper_name,
751                    }),
752                })
753            })
754            .collect();
755
756        Ok(warnings)
757    }
758
759    fn fix(&self, ctx: &crate::lint_context::LintContext) -> Result<String, LintError> {
760        let content = ctx.content;
761        if content.is_empty() || self.config.names.is_empty() {
762            return Ok(content.to_string());
763        }
764
765        let content_lower = if content.is_ascii() {
766            content.to_ascii_lowercase()
767        } else {
768            content.to_lowercase()
769        };
770        let violations = self.find_name_violations(content, ctx, &content_lower);
771        if violations.is_empty() {
772            return Ok(content.to_string());
773        }
774
775        // Process lines and build the fixed content
776        let mut fixed_lines = Vec::new();
777
778        // Group violations by line
779        let mut violations_by_line: HashMap<usize, Vec<(usize, String)>> = HashMap::new();
780        for (line_num, col_num, found_name) in violations {
781            violations_by_line
782                .entry(line_num)
783                .or_default()
784                .push((col_num, found_name));
785        }
786
787        // Sort violations within each line in reverse order
788        for violations in violations_by_line.values_mut() {
789            violations.sort_by_key(|b| std::cmp::Reverse(b.0));
790        }
791
792        // Process each line
793        for (line_idx, line_info) in ctx.lines.iter().enumerate() {
794            let line_num = line_idx + 1;
795
796            // Skip lines where this rule is disabled by inline config
797            if ctx.inline_config().is_rule_disabled(self.name(), line_num) {
798                fixed_lines.push(line_info.content(ctx.content).to_string());
799                continue;
800            }
801
802            if let Some(line_violations) = violations_by_line.get(&line_num) {
803                // This line has violations, fix them
804                let mut fixed_line = line_info.content(ctx.content).to_string();
805
806                for (col_num, found_name) in line_violations {
807                    if let Some(proper_name) = self.get_proper_name_for(found_name) {
808                        let start_col = col_num - 1; // Convert to 0-based
809                        let end_col = start_col + found_name.len();
810
811                        if end_col <= fixed_line.len()
812                            && fixed_line.is_char_boundary(start_col)
813                            && fixed_line.is_char_boundary(end_col)
814                        {
815                            fixed_line.replace_range(start_col..end_col, &proper_name);
816                        }
817                    }
818                }
819
820                fixed_lines.push(fixed_line);
821            } else {
822                // No violations on this line, keep it as is
823                fixed_lines.push(line_info.content(ctx.content).to_string());
824            }
825        }
826
827        // Join lines with newlines, preserving the original ending
828        let mut result = fixed_lines.join("\n");
829        if content.ends_with('\n') && !result.ends_with('\n') {
830            result.push('\n');
831        }
832        Ok(result)
833    }
834
835    fn as_any(&self) -> &dyn std::any::Any {
836        self
837    }
838
839    fn default_config_section(&self) -> Option<(String, toml::Value)> {
840        let json_value = serde_json::to_value(&self.config).ok()?;
841        Some((
842            self.name().to_string(),
843            crate::rule_config_serde::json_to_toml_value(&json_value)?,
844        ))
845    }
846
847    fn from_config(config: &crate::config::Config) -> Box<dyn Rule>
848    where
849        Self: Sized,
850    {
851        let rule_config = crate::rule_config_serde::load_rule_config::<MD044Config>(config);
852        Box::new(Self::from_config_struct(rule_config))
853    }
854}
855
856#[cfg(test)]
857mod tests {
858    use super::*;
859    use crate::lint_context::LintContext;
860
861    fn create_context(content: &str) -> LintContext<'_> {
862        LintContext::new(content, crate::config::MarkdownFlavor::Standard, None)
863    }
864
865    #[test]
866    fn test_correctly_capitalized_names() {
867        let rule = MD044ProperNames::new(
868            vec![
869                "JavaScript".to_string(),
870                "TypeScript".to_string(),
871                "Node.js".to_string(),
872            ],
873            true,
874        );
875
876        let content = "This document uses JavaScript, TypeScript, and Node.js correctly.";
877        let ctx = create_context(content);
878        let result = rule.check(&ctx).unwrap();
879        assert!(result.is_empty(), "Should not flag correctly capitalized names");
880    }
881
882    #[test]
883    fn test_incorrectly_capitalized_names() {
884        let rule = MD044ProperNames::new(vec!["JavaScript".to_string(), "TypeScript".to_string()], true);
885
886        let content = "This document uses javascript and typescript incorrectly.";
887        let ctx = create_context(content);
888        let result = rule.check(&ctx).unwrap();
889
890        assert_eq!(result.len(), 2, "Should flag two incorrect capitalizations");
891        assert_eq!(result[0].message, "Proper name 'javascript' should be 'JavaScript'");
892        assert_eq!(result[0].line, 1);
893        assert_eq!(result[0].column, 20);
894        assert_eq!(result[1].message, "Proper name 'typescript' should be 'TypeScript'");
895        assert_eq!(result[1].line, 1);
896        assert_eq!(result[1].column, 35);
897    }
898
899    #[test]
900    fn test_names_at_beginning_of_sentences() {
901        let rule = MD044ProperNames::new(vec!["JavaScript".to_string(), "Python".to_string()], true);
902
903        let content = "javascript is a great language. python is also popular.";
904        let ctx = create_context(content);
905        let result = rule.check(&ctx).unwrap();
906
907        assert_eq!(result.len(), 2, "Should flag names at beginning of sentences");
908        assert_eq!(result[0].line, 1);
909        assert_eq!(result[0].column, 1);
910        assert_eq!(result[1].line, 1);
911        assert_eq!(result[1].column, 33);
912    }
913
914    #[test]
915    fn test_names_in_code_blocks_checked_by_default() {
916        let rule = MD044ProperNames::new(vec!["JavaScript".to_string()], true);
917
918        let content = r#"Here is some text with JavaScript.
919
920```javascript
921// This javascript should be checked
922const lang = "javascript";
923```
924
925But this javascript should be flagged."#;
926
927        let ctx = create_context(content);
928        let result = rule.check(&ctx).unwrap();
929
930        assert_eq!(result.len(), 3, "Should flag javascript inside and outside code blocks");
931        assert_eq!(result[0].line, 4);
932        assert_eq!(result[1].line, 5);
933        assert_eq!(result[2].line, 8);
934    }
935
936    #[test]
937    fn test_names_in_code_blocks_ignored_when_disabled() {
938        let rule = MD044ProperNames::new(
939            vec!["JavaScript".to_string()],
940            false, // code_blocks = false means skip code blocks
941        );
942
943        let content = r#"```
944javascript in code block
945```"#;
946
947        let ctx = create_context(content);
948        let result = rule.check(&ctx).unwrap();
949
950        assert_eq!(
951            result.len(),
952            0,
953            "Should not flag javascript in code blocks when code_blocks is false"
954        );
955    }
956
957    #[test]
958    fn test_names_in_inline_code_checked_by_default() {
959        let rule = MD044ProperNames::new(vec!["JavaScript".to_string()], true);
960
961        let content = "This is `javascript` in inline code and javascript outside.";
962        let ctx = create_context(content);
963        let result = rule.check(&ctx).unwrap();
964
965        // When code_blocks=true, inline code should be checked
966        assert_eq!(result.len(), 2, "Should flag javascript inside and outside inline code");
967        assert_eq!(result[0].column, 10); // javascript in inline code
968        assert_eq!(result[1].column, 41); // javascript outside
969    }
970
971    #[test]
972    fn test_multiple_names_in_same_line() {
973        let rule = MD044ProperNames::new(
974            vec!["JavaScript".to_string(), "TypeScript".to_string(), "React".to_string()],
975            true,
976        );
977
978        let content = "I use javascript, typescript, and react in my projects.";
979        let ctx = create_context(content);
980        let result = rule.check(&ctx).unwrap();
981
982        assert_eq!(result.len(), 3, "Should flag all three incorrect names");
983        assert_eq!(result[0].message, "Proper name 'javascript' should be 'JavaScript'");
984        assert_eq!(result[1].message, "Proper name 'typescript' should be 'TypeScript'");
985        assert_eq!(result[2].message, "Proper name 'react' should be 'React'");
986    }
987
988    #[test]
989    fn test_case_sensitivity() {
990        let rule = MD044ProperNames::new(vec!["JavaScript".to_string()], true);
991
992        let content = "JAVASCRIPT, Javascript, javascript, and JavaScript variations.";
993        let ctx = create_context(content);
994        let result = rule.check(&ctx).unwrap();
995
996        assert_eq!(result.len(), 3, "Should flag all incorrect case variations");
997        // JavaScript (correct) should not be flagged
998        assert!(result.iter().all(|w| w.message.contains("should be 'JavaScript'")));
999    }
1000
1001    #[test]
1002    fn test_configuration_with_custom_name_list() {
1003        let config = MD044Config {
1004            names: vec!["GitHub".to_string(), "GitLab".to_string(), "DevOps".to_string()],
1005            code_blocks: true,
1006            html_elements: true,
1007            html_comments: true,
1008        };
1009        let rule = MD044ProperNames::from_config_struct(config);
1010
1011        let content = "We use github, gitlab, and devops for our workflow.";
1012        let ctx = create_context(content);
1013        let result = rule.check(&ctx).unwrap();
1014
1015        assert_eq!(result.len(), 3, "Should flag all custom names");
1016        assert_eq!(result[0].message, "Proper name 'github' should be 'GitHub'");
1017        assert_eq!(result[1].message, "Proper name 'gitlab' should be 'GitLab'");
1018        assert_eq!(result[2].message, "Proper name 'devops' should be 'DevOps'");
1019    }
1020
1021    #[test]
1022    fn test_empty_configuration() {
1023        let rule = MD044ProperNames::new(vec![], true);
1024
1025        let content = "This has javascript and typescript but no configured names.";
1026        let ctx = create_context(content);
1027        let result = rule.check(&ctx).unwrap();
1028
1029        assert!(result.is_empty(), "Should not flag anything with empty configuration");
1030    }
1031
1032    #[test]
1033    fn test_names_with_special_characters() {
1034        let rule = MD044ProperNames::new(
1035            vec!["Node.js".to_string(), "ASP.NET".to_string(), "C++".to_string()],
1036            true,
1037        );
1038
1039        let content = "We use nodejs, asp.net, ASP.NET, and c++ in our stack.";
1040        let ctx = create_context(content);
1041        let result = rule.check(&ctx).unwrap();
1042
1043        // nodejs should match Node.js (dotless variation)
1044        // asp.net should be flagged (wrong case)
1045        // ASP.NET should not be flagged (correct)
1046        // c++ should be flagged
1047        assert_eq!(result.len(), 3, "Should handle special characters correctly");
1048
1049        let messages: Vec<&str> = result.iter().map(|w| w.message.as_str()).collect();
1050        assert!(messages.contains(&"Proper name 'nodejs' should be 'Node.js'"));
1051        assert!(messages.contains(&"Proper name 'asp.net' should be 'ASP.NET'"));
1052        assert!(messages.contains(&"Proper name 'c++' should be 'C++'"));
1053    }
1054
1055    #[test]
1056    fn test_word_boundaries() {
1057        let rule = MD044ProperNames::new(vec!["Java".to_string(), "Script".to_string()], true);
1058
1059        let content = "JavaScript is not java or script, but Java and Script are separate.";
1060        let ctx = create_context(content);
1061        let result = rule.check(&ctx).unwrap();
1062
1063        // Should only flag lowercase "java" and "script" as separate words
1064        assert_eq!(result.len(), 2, "Should respect word boundaries");
1065        assert!(result.iter().any(|w| w.column == 19)); // "java" position
1066        assert!(result.iter().any(|w| w.column == 27)); // "script" position
1067    }
1068
1069    #[test]
1070    fn test_fix_method() {
1071        let rule = MD044ProperNames::new(
1072            vec![
1073                "JavaScript".to_string(),
1074                "TypeScript".to_string(),
1075                "Node.js".to_string(),
1076            ],
1077            true,
1078        );
1079
1080        let content = "I love javascript, typescript, and nodejs!";
1081        let ctx = create_context(content);
1082        let fixed = rule.fix(&ctx).unwrap();
1083
1084        assert_eq!(fixed, "I love JavaScript, TypeScript, and Node.js!");
1085    }
1086
1087    #[test]
1088    fn test_fix_multiple_occurrences() {
1089        let rule = MD044ProperNames::new(vec!["Python".to_string()], true);
1090
1091        let content = "python is great. I use python daily. PYTHON is powerful.";
1092        let ctx = create_context(content);
1093        let fixed = rule.fix(&ctx).unwrap();
1094
1095        assert_eq!(fixed, "Python is great. I use Python daily. Python is powerful.");
1096    }
1097
1098    #[test]
1099    fn test_fix_checks_code_blocks_by_default() {
1100        let rule = MD044ProperNames::new(vec!["JavaScript".to_string()], true);
1101
1102        let content = r#"I love javascript.
1103
1104```
1105const lang = "javascript";
1106```
1107
1108More javascript here."#;
1109
1110        let ctx = create_context(content);
1111        let fixed = rule.fix(&ctx).unwrap();
1112
1113        let expected = r#"I love JavaScript.
1114
1115```
1116const lang = "JavaScript";
1117```
1118
1119More JavaScript here."#;
1120
1121        assert_eq!(fixed, expected);
1122    }
1123
1124    #[test]
1125    fn test_multiline_content() {
1126        let rule = MD044ProperNames::new(vec!["Rust".to_string(), "Python".to_string()], true);
1127
1128        let content = r#"First line with rust.
1129Second line with python.
1130Third line with RUST and PYTHON."#;
1131
1132        let ctx = create_context(content);
1133        let result = rule.check(&ctx).unwrap();
1134
1135        assert_eq!(result.len(), 4, "Should flag all incorrect occurrences");
1136        assert_eq!(result[0].line, 1);
1137        assert_eq!(result[1].line, 2);
1138        assert_eq!(result[2].line, 3);
1139        assert_eq!(result[3].line, 3);
1140    }
1141
1142    #[test]
1143    fn test_default_config() {
1144        let config = MD044Config::default();
1145        assert!(config.names.is_empty());
1146        assert!(!config.code_blocks);
1147        assert!(config.html_elements);
1148        assert!(config.html_comments);
1149    }
1150
1151    #[test]
1152    fn test_default_config_checks_html_comments() {
1153        let config = MD044Config {
1154            names: vec!["JavaScript".to_string()],
1155            ..MD044Config::default()
1156        };
1157        let rule = MD044ProperNames::from_config_struct(config);
1158
1159        let content = "# Guide\n\n<!-- javascript mentioned here -->\n";
1160        let ctx = create_context(content);
1161        let result = rule.check(&ctx).unwrap();
1162
1163        assert_eq!(result.len(), 1, "Default config should check HTML comments");
1164        assert_eq!(result[0].line, 3);
1165    }
1166
1167    #[test]
1168    fn test_default_config_skips_code_blocks() {
1169        let config = MD044Config {
1170            names: vec!["JavaScript".to_string()],
1171            ..MD044Config::default()
1172        };
1173        let rule = MD044ProperNames::from_config_struct(config);
1174
1175        let content = "# Guide\n\n```\njavascript in code\n```\n";
1176        let ctx = create_context(content);
1177        let result = rule.check(&ctx).unwrap();
1178
1179        assert_eq!(result.len(), 0, "Default config should skip code blocks");
1180    }
1181
1182    #[test]
1183    fn test_standalone_html_comment_checked() {
1184        let config = MD044Config {
1185            names: vec!["Test".to_string()],
1186            ..MD044Config::default()
1187        };
1188        let rule = MD044ProperNames::from_config_struct(config);
1189
1190        let content = "# Heading\n\n<!-- this is a test example -->\n";
1191        let ctx = create_context(content);
1192        let result = rule.check(&ctx).unwrap();
1193
1194        assert_eq!(result.len(), 1, "Should flag proper name in standalone HTML comment");
1195        assert_eq!(result[0].line, 3);
1196    }
1197
1198    #[test]
1199    fn test_inline_config_comments_not_flagged() {
1200        let config = MD044Config {
1201            names: vec!["RUMDL".to_string()],
1202            ..MD044Config::default()
1203        };
1204        let rule = MD044ProperNames::from_config_struct(config);
1205
1206        // Lines 1, 3, 4, 6 are inline config comments — should not be flagged.
1207        // Lines 2, 5 contain "rumdl" in regular text — flagged by rule.check(),
1208        // but would be suppressed by the linting engine's inline config filtering.
1209        let content = "<!-- rumdl-disable MD044 -->\nSome rumdl text here.\n<!-- rumdl-enable MD044 -->\n<!-- markdownlint-disable -->\nMore rumdl text.\n<!-- markdownlint-enable -->\n";
1210        let ctx = create_context(content);
1211        let result = rule.check(&ctx).unwrap();
1212
1213        assert_eq!(result.len(), 2, "Should only flag body lines, not config comments");
1214        assert_eq!(result[0].line, 2);
1215        assert_eq!(result[1].line, 5);
1216    }
1217
1218    #[test]
1219    fn test_html_comment_skipped_when_disabled() {
1220        let config = MD044Config {
1221            names: vec!["Test".to_string()],
1222            code_blocks: true,
1223            html_elements: true,
1224            html_comments: false,
1225        };
1226        let rule = MD044ProperNames::from_config_struct(config);
1227
1228        let content = "# Heading\n\n<!-- this is a test example -->\n\nRegular test here.\n";
1229        let ctx = create_context(content);
1230        let result = rule.check(&ctx).unwrap();
1231
1232        assert_eq!(
1233            result.len(),
1234            1,
1235            "Should only flag 'test' outside HTML comment when html_comments=false"
1236        );
1237        assert_eq!(result[0].line, 5);
1238    }
1239
1240    #[test]
1241    fn test_fix_corrects_html_comment_content() {
1242        let config = MD044Config {
1243            names: vec!["JavaScript".to_string()],
1244            ..MD044Config::default()
1245        };
1246        let rule = MD044ProperNames::from_config_struct(config);
1247
1248        let content = "# Guide\n\n<!-- javascript mentioned here -->\n";
1249        let ctx = create_context(content);
1250        let fixed = rule.fix(&ctx).unwrap();
1251
1252        assert_eq!(fixed, "# Guide\n\n<!-- JavaScript mentioned here -->\n");
1253    }
1254
1255    #[test]
1256    fn test_fix_does_not_modify_inline_config_comments() {
1257        let config = MD044Config {
1258            names: vec!["RUMDL".to_string()],
1259            ..MD044Config::default()
1260        };
1261        let rule = MD044ProperNames::from_config_struct(config);
1262
1263        let content = "<!-- rumdl-disable -->\nSome rumdl text.\n<!-- rumdl-enable -->\n";
1264        let ctx = create_context(content);
1265        let fixed = rule.fix(&ctx).unwrap();
1266
1267        // Config comments should be untouched
1268        assert!(fixed.contains("<!-- rumdl-disable -->"));
1269        assert!(fixed.contains("<!-- rumdl-enable -->"));
1270        // Body text inside disable block should NOT be fixed (rule is disabled)
1271        assert!(
1272            fixed.contains("Some rumdl text."),
1273            "Line inside rumdl-disable block should not be modified by fix()"
1274        );
1275    }
1276
1277    #[test]
1278    fn test_fix_respects_inline_disable_partial() {
1279        let config = MD044Config {
1280            names: vec!["RUMDL".to_string()],
1281            ..MD044Config::default()
1282        };
1283        let rule = MD044ProperNames::from_config_struct(config);
1284
1285        let content =
1286            "<!-- rumdl-disable MD044 -->\nSome rumdl text.\n<!-- rumdl-enable MD044 -->\n\nSome rumdl text outside.\n";
1287        let ctx = create_context(content);
1288        let fixed = rule.fix(&ctx).unwrap();
1289
1290        // Line inside disable block should be preserved
1291        assert!(
1292            fixed.contains("Some rumdl text.\n<!-- rumdl-enable"),
1293            "Line inside disable block should not be modified"
1294        );
1295        // Line outside disable block should be fixed
1296        assert!(
1297            fixed.contains("Some RUMDL text outside."),
1298            "Line outside disable block should be fixed"
1299        );
1300    }
1301
1302    #[test]
1303    fn test_performance_with_many_names() {
1304        let mut names = vec![];
1305        for i in 0..50 {
1306            names.push(format!("ProperName{i}"));
1307        }
1308
1309        let rule = MD044ProperNames::new(names, true);
1310
1311        let content = "This has propername0, propername25, and propername49 incorrectly.";
1312        let ctx = create_context(content);
1313        let result = rule.check(&ctx).unwrap();
1314
1315        assert_eq!(result.len(), 3, "Should handle many configured names efficiently");
1316    }
1317
1318    #[test]
1319    fn test_large_name_count_performance() {
1320        // Verify MD044 can handle large numbers of names without regex limitations
1321        // This test confirms that fancy-regex handles large patterns well
1322        let names = (0..1000).map(|i| format!("ProperName{i}")).collect::<Vec<_>>();
1323
1324        let rule = MD044ProperNames::new(names, true);
1325
1326        // The combined pattern should be created successfully
1327        assert!(rule.combined_pattern.is_some());
1328
1329        // Should be able to check content without errors
1330        let content = "This has propername0 and propername999 in it.";
1331        let ctx = create_context(content);
1332        let result = rule.check(&ctx).unwrap();
1333
1334        // Should detect both incorrect names
1335        assert_eq!(result.len(), 2, "Should handle 1000 names without issues");
1336    }
1337
1338    #[test]
1339    fn test_cache_behavior() {
1340        let rule = MD044ProperNames::new(vec!["JavaScript".to_string()], true);
1341
1342        let content = "Using javascript here.";
1343        let ctx = create_context(content);
1344
1345        // First check
1346        let result1 = rule.check(&ctx).unwrap();
1347        assert_eq!(result1.len(), 1);
1348
1349        // Second check should use cache
1350        let result2 = rule.check(&ctx).unwrap();
1351        assert_eq!(result2.len(), 1);
1352
1353        // Results should be identical
1354        assert_eq!(result1[0].line, result2[0].line);
1355        assert_eq!(result1[0].column, result2[0].column);
1356    }
1357
1358    #[test]
1359    fn test_html_comments_not_checked_when_disabled() {
1360        let config = MD044Config {
1361            names: vec!["JavaScript".to_string()],
1362            code_blocks: true,    // Check code blocks
1363            html_elements: true,  // Check HTML elements
1364            html_comments: false, // Don't check HTML comments
1365        };
1366        let rule = MD044ProperNames::from_config_struct(config);
1367
1368        let content = r#"Regular javascript here.
1369<!-- This javascript in HTML comment should be ignored -->
1370More javascript outside."#;
1371
1372        let ctx = create_context(content);
1373        let result = rule.check(&ctx).unwrap();
1374
1375        assert_eq!(result.len(), 2, "Should only flag javascript outside HTML comments");
1376        assert_eq!(result[0].line, 1);
1377        assert_eq!(result[1].line, 3);
1378    }
1379
1380    #[test]
1381    fn test_html_comments_checked_when_enabled() {
1382        let config = MD044Config {
1383            names: vec!["JavaScript".to_string()],
1384            code_blocks: true,   // Check code blocks
1385            html_elements: true, // Check HTML elements
1386            html_comments: true, // Check HTML comments
1387        };
1388        let rule = MD044ProperNames::from_config_struct(config);
1389
1390        let content = r#"Regular javascript here.
1391<!-- This javascript in HTML comment should be checked -->
1392More javascript outside."#;
1393
1394        let ctx = create_context(content);
1395        let result = rule.check(&ctx).unwrap();
1396
1397        assert_eq!(
1398            result.len(),
1399            3,
1400            "Should flag all javascript occurrences including in HTML comments"
1401        );
1402    }
1403
1404    #[test]
1405    fn test_multiline_html_comments() {
1406        let config = MD044Config {
1407            names: vec!["Python".to_string(), "JavaScript".to_string()],
1408            code_blocks: true,    // Check code blocks
1409            html_elements: true,  // Check HTML elements
1410            html_comments: false, // Don't check HTML comments
1411        };
1412        let rule = MD044ProperNames::from_config_struct(config);
1413
1414        let content = r#"Regular python here.
1415<!--
1416This is a multiline comment
1417with javascript and python
1418that should be ignored
1419-->
1420More javascript outside."#;
1421
1422        let ctx = create_context(content);
1423        let result = rule.check(&ctx).unwrap();
1424
1425        assert_eq!(result.len(), 2, "Should only flag names outside HTML comments");
1426        assert_eq!(result[0].line, 1); // python
1427        assert_eq!(result[1].line, 7); // javascript
1428    }
1429
1430    #[test]
1431    fn test_fix_preserves_html_comments_when_disabled() {
1432        let config = MD044Config {
1433            names: vec!["JavaScript".to_string()],
1434            code_blocks: true,    // Check code blocks
1435            html_elements: true,  // Check HTML elements
1436            html_comments: false, // Don't check HTML comments
1437        };
1438        let rule = MD044ProperNames::from_config_struct(config);
1439
1440        let content = r#"javascript here.
1441<!-- javascript in comment -->
1442More javascript."#;
1443
1444        let ctx = create_context(content);
1445        let fixed = rule.fix(&ctx).unwrap();
1446
1447        let expected = r#"JavaScript here.
1448<!-- javascript in comment -->
1449More JavaScript."#;
1450
1451        assert_eq!(
1452            fixed, expected,
1453            "Should not fix names inside HTML comments when disabled"
1454        );
1455    }
1456
1457    #[test]
1458    fn test_proper_names_in_link_text_are_flagged() {
1459        let rule = MD044ProperNames::new(
1460            vec!["JavaScript".to_string(), "Node.js".to_string(), "Python".to_string()],
1461            true,
1462        );
1463
1464        let content = r#"Check this [javascript documentation](https://javascript.info) for info.
1465
1466Visit [node.js homepage](https://nodejs.org) and [python tutorial](https://python.org).
1467
1468Real javascript should be flagged.
1469
1470Also see the [typescript guide][ts-ref] for more.
1471
1472Real python should be flagged too.
1473
1474[ts-ref]: https://typescript.org/handbook"#;
1475
1476        let ctx = create_context(content);
1477        let result = rule.check(&ctx).unwrap();
1478
1479        // Link text should be checked, URLs should not be checked
1480        // Line 1: [javascript documentation] - "javascript" should be flagged
1481        // Line 3: [node.js homepage] - "node.js" should be flagged (matches "Node.js")
1482        // Line 3: [python tutorial] - "python" should be flagged
1483        // Line 5: standalone javascript
1484        // Line 9: standalone python
1485        assert_eq!(result.len(), 5, "Expected 5 warnings: 3 in link text + 2 standalone");
1486
1487        // Verify line numbers for link text warnings
1488        let line_1_warnings: Vec<_> = result.iter().filter(|w| w.line == 1).collect();
1489        assert_eq!(line_1_warnings.len(), 1);
1490        assert!(
1491            line_1_warnings[0]
1492                .message
1493                .contains("'javascript' should be 'JavaScript'")
1494        );
1495
1496        let line_3_warnings: Vec<_> = result.iter().filter(|w| w.line == 3).collect();
1497        assert_eq!(line_3_warnings.len(), 2); // node.js and python
1498
1499        // Standalone warnings
1500        assert!(result.iter().any(|w| w.line == 5 && w.message.contains("'javascript'")));
1501        assert!(result.iter().any(|w| w.line == 9 && w.message.contains("'python'")));
1502    }
1503
1504    #[test]
1505    fn test_link_urls_not_flagged() {
1506        let rule = MD044ProperNames::new(vec!["JavaScript".to_string()], true);
1507
1508        // URL contains "javascript" but should NOT be flagged
1509        let content = r#"[Link Text](https://javascript.info/guide)"#;
1510
1511        let ctx = create_context(content);
1512        let result = rule.check(&ctx).unwrap();
1513
1514        // URL should not be checked
1515        assert!(result.is_empty(), "URLs should not be checked for proper names");
1516    }
1517
1518    #[test]
1519    fn test_proper_names_in_image_alt_text_are_flagged() {
1520        let rule = MD044ProperNames::new(vec!["JavaScript".to_string()], true);
1521
1522        let content = r#"Here is a ![javascript logo](javascript.png "javascript icon") image.
1523
1524Real javascript should be flagged."#;
1525
1526        let ctx = create_context(content);
1527        let result = rule.check(&ctx).unwrap();
1528
1529        // Image alt text should be checked, URL and title should not be checked
1530        // Line 1: ![javascript logo] - "javascript" should be flagged
1531        // Line 3: standalone javascript
1532        assert_eq!(result.len(), 2, "Expected 2 warnings: 1 in alt text + 1 standalone");
1533        assert!(result[0].message.contains("'javascript' should be 'JavaScript'"));
1534        assert!(result[0].line == 1); // "![javascript logo]"
1535        assert!(result[1].message.contains("'javascript' should be 'JavaScript'"));
1536        assert!(result[1].line == 3); // "Real javascript should be flagged."
1537    }
1538
1539    #[test]
1540    fn test_image_urls_not_flagged() {
1541        let rule = MD044ProperNames::new(vec!["JavaScript".to_string()], true);
1542
1543        // URL contains "javascript" but should NOT be flagged
1544        let content = r#"![Logo](https://javascript.info/logo.png)"#;
1545
1546        let ctx = create_context(content);
1547        let result = rule.check(&ctx).unwrap();
1548
1549        // Image URL should not be checked
1550        assert!(result.is_empty(), "Image URLs should not be checked for proper names");
1551    }
1552
1553    #[test]
1554    fn test_reference_link_text_flagged_but_definition_not() {
1555        let rule = MD044ProperNames::new(vec!["JavaScript".to_string(), "TypeScript".to_string()], true);
1556
1557        let content = r#"Check the [javascript guide][js-ref] for details.
1558
1559Real javascript should be flagged.
1560
1561[js-ref]: https://javascript.info/typescript/guide"#;
1562
1563        let ctx = create_context(content);
1564        let result = rule.check(&ctx).unwrap();
1565
1566        // Link text should be checked, reference definitions should not
1567        // Line 1: [javascript guide] - should be flagged
1568        // Line 3: standalone javascript - should be flagged
1569        // Line 5: reference definition - should NOT be flagged
1570        assert_eq!(result.len(), 2, "Expected 2 warnings: 1 in link text + 1 standalone");
1571        assert!(result.iter().any(|w| w.line == 1 && w.message.contains("'javascript'")));
1572        assert!(result.iter().any(|w| w.line == 3 && w.message.contains("'javascript'")));
1573    }
1574
1575    #[test]
1576    fn test_reference_definitions_not_flagged() {
1577        let rule = MD044ProperNames::new(vec!["JavaScript".to_string()], true);
1578
1579        // Reference definition should NOT be flagged
1580        let content = r#"[js-ref]: https://javascript.info/guide"#;
1581
1582        let ctx = create_context(content);
1583        let result = rule.check(&ctx).unwrap();
1584
1585        // Reference definition URLs should not be checked
1586        assert!(result.is_empty(), "Reference definitions should not be checked");
1587    }
1588
1589    #[test]
1590    fn test_wikilinks_text_is_flagged() {
1591        let rule = MD044ProperNames::new(vec!["JavaScript".to_string()], true);
1592
1593        // WikiLinks [[destination]] should have their text checked
1594        let content = r#"[[javascript]]
1595
1596Regular javascript here.
1597
1598[[JavaScript|display text]]"#;
1599
1600        let ctx = create_context(content);
1601        let result = rule.check(&ctx).unwrap();
1602
1603        // Line 1: [[javascript]] - should be flagged (WikiLink text)
1604        // Line 3: standalone javascript - should be flagged
1605        // Line 5: [[JavaScript|display text]] - correct capitalization, no flag
1606        assert_eq!(result.len(), 2, "Expected 2 warnings: 1 in WikiLink + 1 standalone");
1607        assert!(
1608            result
1609                .iter()
1610                .any(|w| w.line == 1 && w.column == 3 && w.message.contains("'javascript'"))
1611        );
1612        assert!(result.iter().any(|w| w.line == 3 && w.message.contains("'javascript'")));
1613    }
1614
1615    #[test]
1616    fn test_url_link_text_not_flagged() {
1617        let rule = MD044ProperNames::new(vec!["GitHub".to_string()], true);
1618
1619        // Link text that is itself a URL should not be flagged
1620        let content = r#"[https://github.com/org/repo](https://github.com/org/repo)
1621
1622[http://github.com/org/repo](http://github.com/org/repo)
1623
1624[www.github.com/org/repo](https://www.github.com/org/repo)"#;
1625
1626        let ctx = create_context(content);
1627        let result = rule.check(&ctx).unwrap();
1628
1629        assert!(
1630            result.is_empty(),
1631            "URL-like link text should not be flagged, got: {result:?}"
1632        );
1633    }
1634
1635    #[test]
1636    fn test_url_link_text_with_leading_space_not_flagged() {
1637        let rule = MD044ProperNames::new(vec!["GitHub".to_string()], true);
1638
1639        // Leading/trailing whitespace in link text should be trimmed before URL check
1640        let content = r#"[ https://github.com/org/repo](https://github.com/org/repo)"#;
1641
1642        let ctx = create_context(content);
1643        let result = rule.check(&ctx).unwrap();
1644
1645        assert!(
1646            result.is_empty(),
1647            "URL-like link text with leading space should not be flagged, got: {result:?}"
1648        );
1649    }
1650
1651    #[test]
1652    fn test_url_link_text_uppercase_scheme_not_flagged() {
1653        let rule = MD044ProperNames::new(vec!["GitHub".to_string()], true);
1654
1655        let content = r#"[HTTPS://GITHUB.COM/org/repo](https://github.com/org/repo)"#;
1656
1657        let ctx = create_context(content);
1658        let result = rule.check(&ctx).unwrap();
1659
1660        assert!(
1661            result.is_empty(),
1662            "URL-like link text with uppercase scheme should not be flagged, got: {result:?}"
1663        );
1664    }
1665
1666    #[test]
1667    fn test_non_url_link_text_still_flagged() {
1668        let rule = MD044ProperNames::new(vec!["GitHub".to_string()], true);
1669
1670        // Link text that is NOT a URL should still be flagged
1671        let content = r#"[github.com/org/repo](https://github.com/org/repo)
1672
1673[Visit github](https://github.com/org/repo)
1674
1675[//github.com/org/repo](//github.com/org/repo)
1676
1677[ftp://github.com/org/repo](ftp://github.com/org/repo)"#;
1678
1679        let ctx = create_context(content);
1680        let result = rule.check(&ctx).unwrap();
1681
1682        assert_eq!(result.len(), 4, "Non-URL link text should be flagged, got: {result:?}");
1683        assert!(result.iter().any(|w| w.line == 1)); // github.com (no protocol)
1684        assert!(result.iter().any(|w| w.line == 3)); // Visit github
1685        assert!(result.iter().any(|w| w.line == 5)); // //github.com (protocol-relative)
1686        assert!(result.iter().any(|w| w.line == 7)); // ftp://github.com
1687    }
1688
1689    #[test]
1690    fn test_url_link_text_fix_not_applied() {
1691        let rule = MD044ProperNames::new(vec!["GitHub".to_string()], true);
1692
1693        let content = "[https://github.com/org/repo](https://github.com/org/repo)\n";
1694
1695        let ctx = create_context(content);
1696        let result = rule.fix(&ctx).unwrap();
1697
1698        assert_eq!(result, content, "Fix should not modify URL-like link text");
1699    }
1700
1701    #[test]
1702    fn test_mixed_url_and_regular_link_text() {
1703        let rule = MD044ProperNames::new(vec!["GitHub".to_string()], true);
1704
1705        // Mix of URL link text (should skip) and regular text (should flag)
1706        let content = r#"[https://github.com/org/repo](https://github.com/org/repo)
1707
1708Visit [github documentation](https://github.com/docs) for details.
1709
1710[www.github.com/pricing](https://www.github.com/pricing)"#;
1711
1712        let ctx = create_context(content);
1713        let result = rule.check(&ctx).unwrap();
1714
1715        // Only line 3 should be flagged ("github documentation" is not a URL)
1716        assert_eq!(
1717            result.len(),
1718            1,
1719            "Only non-URL link text should be flagged, got: {result:?}"
1720        );
1721        assert_eq!(result[0].line, 3);
1722    }
1723
1724    #[test]
1725    fn test_html_attribute_values_not_flagged() {
1726        // Matches inside HTML tag attributes (between `<` and `>`) are not flagged.
1727        // Attribute values are not prose — they hold URLs, class names, data values, etc.
1728        let rule = MD044ProperNames::new(vec!["Test".to_string()], true);
1729        let content = "# Heading\n\ntest\n\n<img src=\"www.example.test/test_image.png\">\n";
1730        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1731        let result = rule.check(&ctx).unwrap();
1732
1733        // Nothing on line 5 should be flagged — everything is inside the `<img ...>` tag
1734        let line5_violations: Vec<_> = result.iter().filter(|w| w.line == 5).collect();
1735        assert!(
1736            line5_violations.is_empty(),
1737            "Should not flag anything inside HTML tag attributes: {line5_violations:?}"
1738        );
1739
1740        // Plain text on line 3 is still flagged
1741        let line3_violations: Vec<_> = result.iter().filter(|w| w.line == 3).collect();
1742        assert_eq!(line3_violations.len(), 1, "Plain 'test' on line 3 should be flagged");
1743    }
1744
1745    #[test]
1746    fn test_html_text_content_still_flagged() {
1747        // Text between HTML tags (not inside `<...>`) is still checked.
1748        let rule = MD044ProperNames::new(vec!["Test".to_string()], true);
1749        let content = "# Heading\n\n<a href=\"https://example.test/page\">test link</a>\n";
1750        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1751        let result = rule.check(&ctx).unwrap();
1752
1753        // "example.test" in the href attribute → not flagged (inside `<...>`)
1754        // "test link" in the anchor text → flagged (between `>` and `<`)
1755        assert_eq!(
1756            result.len(),
1757            1,
1758            "Should flag only 'test' in anchor text, not in href: {result:?}"
1759        );
1760        assert_eq!(result[0].column, 37, "Should flag col 37 ('test link' in anchor text)");
1761    }
1762
1763    #[test]
1764    fn test_html_attribute_various_not_flagged() {
1765        // All attribute types are ignored: src, href, alt, class, data-*, title, etc.
1766        let rule = MD044ProperNames::new(vec!["Test".to_string()], true);
1767        let content = concat!(
1768            "# Heading\n\n",
1769            "<img src=\"test.png\" alt=\"test image\">\n",
1770            "<span class=\"test-class\" data-test=\"value\">test content</span>\n",
1771        );
1772        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1773        let result = rule.check(&ctx).unwrap();
1774
1775        // Only "test content" (between tags on line 4) should be flagged
1776        assert_eq!(
1777            result.len(),
1778            1,
1779            "Should flag only 'test content' between tags: {result:?}"
1780        );
1781        assert_eq!(result[0].line, 4);
1782    }
1783
1784    #[test]
1785    fn test_plain_text_underscore_boundary_unchanged() {
1786        // Plain text (outside HTML tags) still uses original word boundary semantics where
1787        // underscore is a boundary character, matching markdownlint's behavior via AST splitting.
1788        let rule = MD044ProperNames::new(vec!["Test".to_string()], true);
1789        let content = "# Heading\n\ntest_image is here and just_test ends here\n";
1790        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1791        let result = rule.check(&ctx).unwrap();
1792
1793        // Both "test_image" (test at start) and "just_test" (test at end) are flagged
1794        // because in plain text, "_" is a word boundary
1795        assert_eq!(
1796            result.len(),
1797            2,
1798            "Should flag 'test' in both 'test_image' and 'just_test': {result:?}"
1799        );
1800        let cols: Vec<usize> = result.iter().map(|w| w.column).collect();
1801        assert!(cols.contains(&1), "Should flag col 1 (test_image): {cols:?}");
1802        assert!(cols.contains(&29), "Should flag col 29 (just_test): {cols:?}");
1803    }
1804
1805    #[test]
1806    fn test_frontmatter_yaml_keys_not_flagged() {
1807        // YAML keys in frontmatter should NOT be checked for proper name violations.
1808        // Only values should be checked.
1809        let rule = MD044ProperNames::new(vec!["Test".to_string()], true);
1810
1811        let content = "---\ntitle: Heading\ntest: Some Test value\n---\n\nTest\n";
1812        let ctx = create_context(content);
1813        let result = rule.check(&ctx).unwrap();
1814
1815        // "test" in the YAML key (line 3) should NOT be flagged
1816        // "Test" in the YAML value (line 3) is correct capitalization, no flag
1817        // "Test" in body (line 6) is correct capitalization, no flag
1818        assert!(
1819            result.is_empty(),
1820            "Should not flag YAML keys or correctly capitalized values: {result:?}"
1821        );
1822    }
1823
1824    #[test]
1825    fn test_frontmatter_yaml_values_flagged() {
1826        // Incorrectly capitalized names in YAML values should be flagged.
1827        let rule = MD044ProperNames::new(vec!["Test".to_string()], true);
1828
1829        let content = "---\ntitle: Heading\nkey: a test value\n---\n\nTest\n";
1830        let ctx = create_context(content);
1831        let result = rule.check(&ctx).unwrap();
1832
1833        // "test" in the YAML value (line 3) SHOULD be flagged
1834        assert_eq!(result.len(), 1, "Should flag 'test' in YAML value: {result:?}");
1835        assert_eq!(result[0].line, 3);
1836        assert_eq!(result[0].column, 8); // "key: a " = 7 chars, then "test" at column 8
1837    }
1838
1839    #[test]
1840    fn test_frontmatter_key_matches_name_not_flagged() {
1841        // A YAML key that happens to match a configured name should NOT be flagged.
1842        let rule = MD044ProperNames::new(vec!["Test".to_string()], true);
1843
1844        let content = "---\ntest: other value\n---\n\nBody text\n";
1845        let ctx = create_context(content);
1846        let result = rule.check(&ctx).unwrap();
1847
1848        assert!(
1849            result.is_empty(),
1850            "Should not flag YAML key that matches configured name: {result:?}"
1851        );
1852    }
1853
1854    #[test]
1855    fn test_frontmatter_empty_value_not_flagged() {
1856        // YAML key with no value should be skipped entirely.
1857        let rule = MD044ProperNames::new(vec!["Test".to_string()], true);
1858
1859        let content = "---\ntest:\ntest: \n---\n\nBody text\n";
1860        let ctx = create_context(content);
1861        let result = rule.check(&ctx).unwrap();
1862
1863        assert!(
1864            result.is_empty(),
1865            "Should not flag YAML keys with empty values: {result:?}"
1866        );
1867    }
1868
1869    #[test]
1870    fn test_frontmatter_nested_yaml_key_not_flagged() {
1871        // Nested/indented YAML keys should also be skipped.
1872        let rule = MD044ProperNames::new(vec!["Test".to_string()], true);
1873
1874        let content = "---\nparent:\n  test: nested value\n---\n\nBody text\n";
1875        let ctx = create_context(content);
1876        let result = rule.check(&ctx).unwrap();
1877
1878        // "test" as a nested key should NOT be flagged
1879        assert!(result.is_empty(), "Should not flag nested YAML keys: {result:?}");
1880    }
1881
1882    #[test]
1883    fn test_frontmatter_list_items_checked() {
1884        // YAML list items are values and should be checked for proper names.
1885        let rule = MD044ProperNames::new(vec!["Test".to_string()], true);
1886
1887        let content = "---\ntags:\n  - test\n  - other\n---\n\nBody text\n";
1888        let ctx = create_context(content);
1889        let result = rule.check(&ctx).unwrap();
1890
1891        // "test" as a list item value SHOULD be flagged
1892        assert_eq!(result.len(), 1, "Should flag 'test' in YAML list item: {result:?}");
1893        assert_eq!(result[0].line, 3);
1894    }
1895
1896    #[test]
1897    fn test_frontmatter_value_with_multiple_colons() {
1898        // For "key: value: more", key is before first colon.
1899        let rule = MD044ProperNames::new(vec!["Test".to_string()], true);
1900
1901        let content = "---\ntest: description: a test thing\n---\n\nBody text\n";
1902        let ctx = create_context(content);
1903        let result = rule.check(&ctx).unwrap();
1904
1905        // "test" as key should NOT be flagged
1906        // "test" in value portion ("description: a test thing") SHOULD be flagged
1907        assert_eq!(
1908            result.len(),
1909            1,
1910            "Should flag 'test' in value after first colon: {result:?}"
1911        );
1912        assert_eq!(result[0].line, 2);
1913        assert!(result[0].column > 6, "Violation column should be in value portion");
1914    }
1915
1916    #[test]
1917    fn test_frontmatter_does_not_affect_body() {
1918        // Body text after frontmatter should still be fully checked.
1919        let rule = MD044ProperNames::new(vec!["Test".to_string()], true);
1920
1921        let content = "---\ntitle: Heading\n---\n\ntest should be flagged here\n";
1922        let ctx = create_context(content);
1923        let result = rule.check(&ctx).unwrap();
1924
1925        assert_eq!(result.len(), 1, "Should flag 'test' in body text: {result:?}");
1926        assert_eq!(result[0].line, 5);
1927    }
1928
1929    #[test]
1930    fn test_frontmatter_fix_corrects_values_preserves_keys() {
1931        // Fix should correct YAML values but preserve keys.
1932        let rule = MD044ProperNames::new(vec!["Test".to_string()], true);
1933
1934        let content = "---\ntest: a test value\n---\n\ntest here\n";
1935        let ctx = create_context(content);
1936        let fixed = rule.fix(&ctx).unwrap();
1937
1938        // Key "test" should remain lowercase; value "test" should become "Test"
1939        assert_eq!(fixed, "---\ntest: a Test value\n---\n\nTest here\n");
1940    }
1941
1942    #[test]
1943    fn test_frontmatter_multiword_value_flagged() {
1944        // Multiple proper names in a single YAML value should all be flagged.
1945        let rule = MD044ProperNames::new(vec!["JavaScript".to_string(), "TypeScript".to_string()], true);
1946
1947        let content = "---\ndescription: Learn javascript and typescript\n---\n\nBody\n";
1948        let ctx = create_context(content);
1949        let result = rule.check(&ctx).unwrap();
1950
1951        assert_eq!(result.len(), 2, "Should flag both names in YAML value: {result:?}");
1952        assert!(result.iter().all(|w| w.line == 2));
1953    }
1954
1955    #[test]
1956    fn test_frontmatter_yaml_comments_not_checked() {
1957        // YAML comments inside frontmatter should be skipped entirely.
1958        let rule = MD044ProperNames::new(vec!["Test".to_string()], true);
1959
1960        let content = "---\n# test comment\ntitle: Heading\n---\n\nBody text\n";
1961        let ctx = create_context(content);
1962        let result = rule.check(&ctx).unwrap();
1963
1964        assert!(result.is_empty(), "Should not flag names in YAML comments: {result:?}");
1965    }
1966
1967    #[test]
1968    fn test_frontmatter_delimiters_not_checked() {
1969        // Frontmatter delimiter lines (--- or +++) should never be checked.
1970        let rule = MD044ProperNames::new(vec!["Test".to_string()], true);
1971
1972        let content = "---\ntitle: Heading\n---\n\ntest here\n";
1973        let ctx = create_context(content);
1974        let result = rule.check(&ctx).unwrap();
1975
1976        // Only the body "test" on line 5 should be flagged
1977        assert_eq!(result.len(), 1, "Should only flag body text: {result:?}");
1978        assert_eq!(result[0].line, 5);
1979    }
1980
1981    #[test]
1982    fn test_frontmatter_continuation_lines_checked() {
1983        // Continuation lines (indented, no colon) are value content and should be checked.
1984        let rule = MD044ProperNames::new(vec!["Test".to_string()], true);
1985
1986        let content = "---\ndescription: >\n  a test value\n  continued here\n---\n\nBody\n";
1987        let ctx = create_context(content);
1988        let result = rule.check(&ctx).unwrap();
1989
1990        // "test" on the continuation line should be flagged
1991        assert_eq!(result.len(), 1, "Should flag 'test' in continuation line: {result:?}");
1992        assert_eq!(result[0].line, 3);
1993    }
1994
1995    #[test]
1996    fn test_frontmatter_quoted_values_checked() {
1997        // Quoted YAML values should have their content checked (inside the quotes).
1998        let rule = MD044ProperNames::new(vec!["Test".to_string()], true);
1999
2000        let content = "---\ntitle: \"a test title\"\n---\n\nBody\n";
2001        let ctx = create_context(content);
2002        let result = rule.check(&ctx).unwrap();
2003
2004        assert_eq!(result.len(), 1, "Should flag 'test' in quoted YAML value: {result:?}");
2005        assert_eq!(result[0].line, 2);
2006    }
2007
2008    #[test]
2009    fn test_frontmatter_single_quoted_values_checked() {
2010        // Single-quoted YAML values should have their content checked.
2011        let rule = MD044ProperNames::new(vec!["Test".to_string()], true);
2012
2013        let content = "---\ntitle: 'a test title'\n---\n\nBody\n";
2014        let ctx = create_context(content);
2015        let result = rule.check(&ctx).unwrap();
2016
2017        assert_eq!(
2018            result.len(),
2019            1,
2020            "Should flag 'test' in single-quoted YAML value: {result:?}"
2021        );
2022        assert_eq!(result[0].line, 2);
2023    }
2024
2025    #[test]
2026    fn test_frontmatter_fix_multiword_values() {
2027        // Fix should correct all proper names in frontmatter values.
2028        let rule = MD044ProperNames::new(vec!["JavaScript".to_string(), "TypeScript".to_string()], true);
2029
2030        let content = "---\ndescription: Learn javascript and typescript\n---\n\nBody\n";
2031        let ctx = create_context(content);
2032        let fixed = rule.fix(&ctx).unwrap();
2033
2034        assert_eq!(
2035            fixed,
2036            "---\ndescription: Learn JavaScript and TypeScript\n---\n\nBody\n"
2037        );
2038    }
2039
2040    #[test]
2041    fn test_frontmatter_fix_preserves_yaml_structure() {
2042        // Fix should preserve YAML structure while correcting values.
2043        let rule = MD044ProperNames::new(vec!["Test".to_string()], true);
2044
2045        let content = "---\ntags:\n  - test\n  - other\ntitle: a test doc\n---\n\ntest body\n";
2046        let ctx = create_context(content);
2047        let fixed = rule.fix(&ctx).unwrap();
2048
2049        assert_eq!(
2050            fixed,
2051            "---\ntags:\n  - Test\n  - other\ntitle: a Test doc\n---\n\nTest body\n"
2052        );
2053    }
2054
2055    #[test]
2056    fn test_frontmatter_toml_delimiters_not_checked() {
2057        // TOML frontmatter with +++ delimiters should also be handled.
2058        let rule = MD044ProperNames::new(vec!["Test".to_string()], true);
2059
2060        let content = "+++\ntitle = \"a test title\"\n+++\n\ntest body\n";
2061        let ctx = create_context(content);
2062        let result = rule.check(&ctx).unwrap();
2063
2064        // "title" as TOML key should NOT be flagged
2065        // "test" in TOML quoted value SHOULD be flagged (line 2)
2066        // "test" in body SHOULD be flagged (line 5)
2067        assert_eq!(result.len(), 2, "Should flag TOML value and body: {result:?}");
2068        let fm_violations: Vec<_> = result.iter().filter(|w| w.line == 2).collect();
2069        assert_eq!(fm_violations.len(), 1, "Should flag 'test' in TOML value: {result:?}");
2070        let body_violations: Vec<_> = result.iter().filter(|w| w.line == 5).collect();
2071        assert_eq!(body_violations.len(), 1, "Should flag body 'test': {result:?}");
2072    }
2073
2074    #[test]
2075    fn test_frontmatter_toml_key_not_flagged() {
2076        // TOML keys should NOT be flagged, only values.
2077        let rule = MD044ProperNames::new(vec!["Test".to_string()], true);
2078
2079        let content = "+++\ntest = \"other value\"\n+++\n\nBody text\n";
2080        let ctx = create_context(content);
2081        let result = rule.check(&ctx).unwrap();
2082
2083        assert!(
2084            result.is_empty(),
2085            "Should not flag TOML key that matches configured name: {result:?}"
2086        );
2087    }
2088
2089    #[test]
2090    fn test_frontmatter_toml_fix_preserves_keys() {
2091        // Fix should correct TOML values but preserve keys.
2092        let rule = MD044ProperNames::new(vec!["Test".to_string()], true);
2093
2094        let content = "+++\ntest = \"a test value\"\n+++\n\ntest here\n";
2095        let ctx = create_context(content);
2096        let fixed = rule.fix(&ctx).unwrap();
2097
2098        // Key "test" should remain lowercase; value "test" should become "Test"
2099        assert_eq!(fixed, "+++\ntest = \"a Test value\"\n+++\n\nTest here\n");
2100    }
2101
2102    #[test]
2103    fn test_frontmatter_list_item_mapping_key_not_flagged() {
2104        // In "- test: nested value", "test" is a YAML key within a list-item mapping.
2105        // The key should NOT be flagged; only the value should be checked.
2106        let rule = MD044ProperNames::new(vec!["Test".to_string()], true);
2107
2108        let content = "---\nitems:\n  - test: nested value\n---\n\nBody text\n";
2109        let ctx = create_context(content);
2110        let result = rule.check(&ctx).unwrap();
2111
2112        assert!(
2113            result.is_empty(),
2114            "Should not flag YAML key in list-item mapping: {result:?}"
2115        );
2116    }
2117
2118    #[test]
2119    fn test_frontmatter_list_item_mapping_value_flagged() {
2120        // In "- key: test value", the value portion should be checked.
2121        let rule = MD044ProperNames::new(vec!["Test".to_string()], true);
2122
2123        let content = "---\nitems:\n  - key: a test value\n---\n\nBody text\n";
2124        let ctx = create_context(content);
2125        let result = rule.check(&ctx).unwrap();
2126
2127        assert_eq!(
2128            result.len(),
2129            1,
2130            "Should flag 'test' in list-item mapping value: {result:?}"
2131        );
2132        assert_eq!(result[0].line, 3);
2133    }
2134
2135    #[test]
2136    fn test_frontmatter_bare_list_item_still_flagged() {
2137        // Bare list items without a colon (e.g., "- test") are values and should be flagged.
2138        let rule = MD044ProperNames::new(vec!["Test".to_string()], true);
2139
2140        let content = "---\ntags:\n  - test\n  - other\n---\n\nBody text\n";
2141        let ctx = create_context(content);
2142        let result = rule.check(&ctx).unwrap();
2143
2144        assert_eq!(result.len(), 1, "Should flag 'test' in bare list item: {result:?}");
2145        assert_eq!(result[0].line, 3);
2146    }
2147
2148    #[test]
2149    fn test_frontmatter_flow_mapping_not_flagged() {
2150        // Flow mappings like {test: value} contain YAML keys that should not be flagged.
2151        // The entire flow construct should be skipped.
2152        let rule = MD044ProperNames::new(vec!["Test".to_string()], true);
2153
2154        let content = "---\nflow_map: {test: value, other: test}\n---\n\nBody text\n";
2155        let ctx = create_context(content);
2156        let result = rule.check(&ctx).unwrap();
2157
2158        assert!(
2159            result.is_empty(),
2160            "Should not flag names inside flow mappings: {result:?}"
2161        );
2162    }
2163
2164    #[test]
2165    fn test_frontmatter_flow_sequence_not_flagged() {
2166        // Flow sequences like [test, other] should also be skipped.
2167        let rule = MD044ProperNames::new(vec!["Test".to_string()], true);
2168
2169        let content = "---\nitems: [test, other, test]\n---\n\nBody text\n";
2170        let ctx = create_context(content);
2171        let result = rule.check(&ctx).unwrap();
2172
2173        assert!(
2174            result.is_empty(),
2175            "Should not flag names inside flow sequences: {result:?}"
2176        );
2177    }
2178
2179    #[test]
2180    fn test_frontmatter_list_item_mapping_fix_preserves_key() {
2181        // Fix should correct values in list-item mappings but preserve keys.
2182        let rule = MD044ProperNames::new(vec!["Test".to_string()], true);
2183
2184        let content = "---\nitems:\n  - test: a test value\n---\n\ntest here\n";
2185        let ctx = create_context(content);
2186        let fixed = rule.fix(&ctx).unwrap();
2187
2188        // "test" as list-item key should remain lowercase;
2189        // "test" in value portion should become "Test"
2190        assert_eq!(fixed, "---\nitems:\n  - test: a Test value\n---\n\nTest here\n");
2191    }
2192
2193    #[test]
2194    fn test_frontmatter_backtick_code_not_flagged() {
2195        // Names inside backticks in frontmatter should NOT be flagged when code_blocks=false.
2196        let config = MD044Config {
2197            names: vec!["GoodApplication".to_string()],
2198            code_blocks: false,
2199            ..MD044Config::default()
2200        };
2201        let rule = MD044ProperNames::from_config_struct(config);
2202
2203        let content = "---\ntitle: \"`goodapplication` CLI\"\n---\n\nIntroductory `goodapplication` CLI text.\n";
2204        let ctx = create_context(content);
2205        let result = rule.check(&ctx).unwrap();
2206
2207        // Neither the frontmatter nor the body backtick-wrapped name should be flagged
2208        assert!(
2209            result.is_empty(),
2210            "Should not flag names inside backticks in frontmatter or body: {result:?}"
2211        );
2212    }
2213
2214    #[test]
2215    fn test_frontmatter_unquoted_backtick_code_not_flagged() {
2216        // Exact case from issue #513: unquoted YAML frontmatter with backticks
2217        let config = MD044Config {
2218            names: vec!["GoodApplication".to_string()],
2219            code_blocks: false,
2220            ..MD044Config::default()
2221        };
2222        let rule = MD044ProperNames::from_config_struct(config);
2223
2224        let content = "---\ntitle: `goodapplication` CLI\n---\n\nIntroductory `goodapplication` CLI text.\n";
2225        let ctx = create_context(content);
2226        let result = rule.check(&ctx).unwrap();
2227
2228        assert!(
2229            result.is_empty(),
2230            "Should not flag names inside backticks in unquoted YAML frontmatter: {result:?}"
2231        );
2232    }
2233
2234    #[test]
2235    fn test_frontmatter_bare_name_still_flagged_with_backtick_nearby() {
2236        // Names outside backticks in frontmatter should still be flagged.
2237        let config = MD044Config {
2238            names: vec!["GoodApplication".to_string()],
2239            code_blocks: false,
2240            ..MD044Config::default()
2241        };
2242        let rule = MD044ProperNames::from_config_struct(config);
2243
2244        let content = "---\ntitle: goodapplication `goodapplication` CLI\n---\n\nBody\n";
2245        let ctx = create_context(content);
2246        let result = rule.check(&ctx).unwrap();
2247
2248        // Only the bare "goodapplication" (before backticks) should be flagged
2249        assert_eq!(
2250            result.len(),
2251            1,
2252            "Should flag bare name but not backtick-wrapped name: {result:?}"
2253        );
2254        assert_eq!(result[0].line, 2);
2255        assert_eq!(result[0].column, 8); // "title: " = 7 chars, name at column 8
2256    }
2257
2258    #[test]
2259    fn test_frontmatter_backtick_code_with_code_blocks_true() {
2260        // When code_blocks=true, names inside backticks ARE checked.
2261        let config = MD044Config {
2262            names: vec!["GoodApplication".to_string()],
2263            code_blocks: true,
2264            ..MD044Config::default()
2265        };
2266        let rule = MD044ProperNames::from_config_struct(config);
2267
2268        let content = "---\ntitle: \"`goodapplication` CLI\"\n---\n\nBody\n";
2269        let ctx = create_context(content);
2270        let result = rule.check(&ctx).unwrap();
2271
2272        // With code_blocks=true, backtick-wrapped name SHOULD be flagged
2273        assert_eq!(
2274            result.len(),
2275            1,
2276            "Should flag backtick-wrapped name when code_blocks=true: {result:?}"
2277        );
2278        assert_eq!(result[0].line, 2);
2279    }
2280
2281    #[test]
2282    fn test_frontmatter_fix_preserves_backtick_code() {
2283        // Fix should NOT change names inside backticks in frontmatter.
2284        let config = MD044Config {
2285            names: vec!["GoodApplication".to_string()],
2286            code_blocks: false,
2287            ..MD044Config::default()
2288        };
2289        let rule = MD044ProperNames::from_config_struct(config);
2290
2291        let content = "---\ntitle: \"`goodapplication` CLI\"\n---\n\nIntroductory `goodapplication` CLI text.\n";
2292        let ctx = create_context(content);
2293        let fixed = rule.fix(&ctx).unwrap();
2294
2295        // Neither backtick-wrapped occurrence should be changed
2296        assert_eq!(
2297            fixed, content,
2298            "Fix should not modify names inside backticks in frontmatter"
2299        );
2300    }
2301
2302    // --- Angle-bracket URL tests (issue #457) ---
2303
2304    #[test]
2305    fn test_angle_bracket_url_in_html_comment_not_flagged() {
2306        // Angle-bracket URLs inside HTML comments should be skipped
2307        let config = MD044Config {
2308            names: vec!["Test".to_string()],
2309            ..MD044Config::default()
2310        };
2311        let rule = MD044ProperNames::from_config_struct(config);
2312
2313        let content = "---\ntitle: Level 1 heading\n---\n\n<https://www.example.test>\n\n<!-- This is a Test https://www.example.test -->\n<!-- This is a Test <https://www.example.test> -->\n";
2314        let ctx = create_context(content);
2315        let result = rule.check(&ctx).unwrap();
2316
2317        // Line 7: "Test" in comment prose before bare URL -- already correct capitalization
2318        // Line 7: "test" in bare URL (not in angle brackets) -- but "test" is in URL domain, not prose.
2319        //   However, .example.test has "test" at a word boundary (after '.'), so it IS flagged.
2320        // Line 8: "Test" in comment prose -- correct capitalization, not flagged
2321        // Line 8: "test" in <https://www.example.test> -- inside angle-bracket URL, NOT flagged
2322
2323        // The key assertion: line 8's angle-bracket URL should NOT produce a warning
2324        let line8_warnings: Vec<_> = result.iter().filter(|w| w.line == 8).collect();
2325        assert!(
2326            line8_warnings.is_empty(),
2327            "Should not flag names inside angle-bracket URLs in HTML comments: {line8_warnings:?}"
2328        );
2329    }
2330
2331    #[test]
2332    fn test_bare_url_in_html_comment_still_flagged() {
2333        // Bare URLs (not in angle brackets) inside HTML comments should still be checked
2334        let config = MD044Config {
2335            names: vec!["Test".to_string()],
2336            ..MD044Config::default()
2337        };
2338        let rule = MD044ProperNames::from_config_struct(config);
2339
2340        let content = "<!-- This is a test https://www.example.test -->\n";
2341        let ctx = create_context(content);
2342        let result = rule.check(&ctx).unwrap();
2343
2344        // "test" appears as prose text before URL and also in the bare URL domain
2345        // At minimum, the prose "test" should be flagged
2346        assert!(
2347            !result.is_empty(),
2348            "Should flag 'test' in prose text of HTML comment with bare URL"
2349        );
2350    }
2351
2352    #[test]
2353    fn test_angle_bracket_url_in_regular_markdown_not_flagged() {
2354        // Angle-bracket URLs in regular markdown are already handled by the link parser,
2355        // but the angle-bracket check provides a safety net
2356        let rule = MD044ProperNames::new(vec!["Test".to_string()], true);
2357
2358        let content = "<https://www.example.test>\n";
2359        let ctx = create_context(content);
2360        let result = rule.check(&ctx).unwrap();
2361
2362        assert!(
2363            result.is_empty(),
2364            "Should not flag names inside angle-bracket URLs in regular markdown: {result:?}"
2365        );
2366    }
2367
2368    #[test]
2369    fn test_multiple_angle_bracket_urls_in_one_comment() {
2370        let config = MD044Config {
2371            names: vec!["Test".to_string()],
2372            ..MD044Config::default()
2373        };
2374        let rule = MD044ProperNames::from_config_struct(config);
2375
2376        let content = "<!-- See <https://test.example.com> and <https://www.example.test> for details -->\n";
2377        let ctx = create_context(content);
2378        let result = rule.check(&ctx).unwrap();
2379
2380        // Both URLs are inside angle brackets, so "test" inside them should NOT be flagged
2381        assert!(
2382            result.is_empty(),
2383            "Should not flag names inside multiple angle-bracket URLs: {result:?}"
2384        );
2385    }
2386
2387    #[test]
2388    fn test_angle_bracket_non_url_still_flagged() {
2389        // <Test> is NOT a URL (no scheme), so is_in_angle_bracket_url does NOT protect it.
2390        // Whether it gets flagged depends on HTML tag detection, not on our URL check.
2391        assert!(
2392            !MD044ProperNames::is_in_angle_bracket_url("<test> which is not a URL.", 1),
2393            "is_in_angle_bracket_url should return false for non-URL angle brackets"
2394        );
2395    }
2396
2397    #[test]
2398    fn test_angle_bracket_mailto_url_not_flagged() {
2399        let config = MD044Config {
2400            names: vec!["Test".to_string()],
2401            ..MD044Config::default()
2402        };
2403        let rule = MD044ProperNames::from_config_struct(config);
2404
2405        let content = "<!-- Contact <mailto:test@example.com> for help -->\n";
2406        let ctx = create_context(content);
2407        let result = rule.check(&ctx).unwrap();
2408
2409        assert!(
2410            result.is_empty(),
2411            "Should not flag names inside angle-bracket mailto URLs: {result:?}"
2412        );
2413    }
2414
2415    #[test]
2416    fn test_angle_bracket_ftp_url_not_flagged() {
2417        let config = MD044Config {
2418            names: vec!["Test".to_string()],
2419            ..MD044Config::default()
2420        };
2421        let rule = MD044ProperNames::from_config_struct(config);
2422
2423        let content = "<!-- Download from <ftp://test.example.com/file> -->\n";
2424        let ctx = create_context(content);
2425        let result = rule.check(&ctx).unwrap();
2426
2427        assert!(
2428            result.is_empty(),
2429            "Should not flag names inside angle-bracket FTP URLs: {result:?}"
2430        );
2431    }
2432
2433    #[test]
2434    fn test_angle_bracket_url_fix_preserves_url() {
2435        // Fix should not modify text inside angle-bracket URLs
2436        let config = MD044Config {
2437            names: vec!["Test".to_string()],
2438            ..MD044Config::default()
2439        };
2440        let rule = MD044ProperNames::from_config_struct(config);
2441
2442        let content = "<!-- test text <https://www.example.test> -->\n";
2443        let ctx = create_context(content);
2444        let fixed = rule.fix(&ctx).unwrap();
2445
2446        // "test" in prose should be fixed, URL should be preserved
2447        assert!(
2448            fixed.contains("<https://www.example.test>"),
2449            "Fix should preserve angle-bracket URLs: {fixed}"
2450        );
2451        assert!(
2452            fixed.contains("Test text"),
2453            "Fix should correct prose 'test' to 'Test': {fixed}"
2454        );
2455    }
2456
2457    #[test]
2458    fn test_is_in_angle_bracket_url_helper() {
2459        // Direct tests of the helper function
2460        let line = "text <https://example.test> more text";
2461
2462        // Inside the URL
2463        assert!(MD044ProperNames::is_in_angle_bracket_url(line, 5)); // '<'
2464        assert!(MD044ProperNames::is_in_angle_bracket_url(line, 6)); // 'h'
2465        assert!(MD044ProperNames::is_in_angle_bracket_url(line, 15)); // middle of URL
2466        assert!(MD044ProperNames::is_in_angle_bracket_url(line, 26)); // '>'
2467
2468        // Outside the URL
2469        assert!(!MD044ProperNames::is_in_angle_bracket_url(line, 0)); // 't' at start
2470        assert!(!MD044ProperNames::is_in_angle_bracket_url(line, 4)); // space before '<'
2471        assert!(!MD044ProperNames::is_in_angle_bracket_url(line, 27)); // space after '>'
2472
2473        // Non-URL angle brackets
2474        assert!(!MD044ProperNames::is_in_angle_bracket_url("<notaurl>", 1));
2475
2476        // mailto scheme
2477        assert!(MD044ProperNames::is_in_angle_bracket_url(
2478            "<mailto:test@example.com>",
2479            10
2480        ));
2481
2482        // ftp scheme
2483        assert!(MD044ProperNames::is_in_angle_bracket_url(
2484            "<ftp://test.example.com>",
2485            10
2486        ));
2487    }
2488
2489    #[test]
2490    fn test_is_in_angle_bracket_url_uppercase_scheme() {
2491        // RFC 3986: URI schemes are case-insensitive
2492        assert!(MD044ProperNames::is_in_angle_bracket_url(
2493            "<HTTPS://test.example.com>",
2494            10
2495        ));
2496        assert!(MD044ProperNames::is_in_angle_bracket_url(
2497            "<Http://test.example.com>",
2498            10
2499        ));
2500    }
2501
2502    #[test]
2503    fn test_is_in_angle_bracket_url_uncommon_schemes() {
2504        // ssh scheme
2505        assert!(MD044ProperNames::is_in_angle_bracket_url(
2506            "<ssh://test@example.com>",
2507            10
2508        ));
2509        // file scheme
2510        assert!(MD044ProperNames::is_in_angle_bracket_url("<file:///test/path>", 10));
2511        // data scheme (no authority, just colon)
2512        assert!(MD044ProperNames::is_in_angle_bracket_url("<data:text/plain;test>", 10));
2513    }
2514
2515    #[test]
2516    fn test_is_in_angle_bracket_url_unclosed() {
2517        // Unclosed angle bracket should NOT match
2518        assert!(!MD044ProperNames::is_in_angle_bracket_url(
2519            "<https://test.example.com",
2520            10
2521        ));
2522    }
2523
2524    #[test]
2525    fn test_vale_inline_config_comments_not_flagged() {
2526        let config = MD044Config {
2527            names: vec!["Vale".to_string(), "JavaScript".to_string()],
2528            ..MD044Config::default()
2529        };
2530        let rule = MD044ProperNames::from_config_struct(config);
2531
2532        let content = "\
2533<!-- vale off -->
2534Some javascript text here.
2535<!-- vale on -->
2536<!-- vale Style.Rule = NO -->
2537More javascript text.
2538<!-- vale Style.Rule = YES -->
2539<!-- vale JavaScript.Grammar = NO -->
2540";
2541        let ctx = create_context(content);
2542        let result = rule.check(&ctx).unwrap();
2543
2544        // Only the body text lines (2, 5) should be flagged for "javascript"
2545        assert_eq!(result.len(), 2, "Should only flag body lines, not Vale config comments");
2546        assert_eq!(result[0].line, 2);
2547        assert_eq!(result[1].line, 5);
2548    }
2549
2550    #[test]
2551    fn test_remark_lint_inline_config_comments_not_flagged() {
2552        let config = MD044Config {
2553            names: vec!["JavaScript".to_string()],
2554            ..MD044Config::default()
2555        };
2556        let rule = MD044ProperNames::from_config_struct(config);
2557
2558        let content = "\
2559<!-- lint disable remark-lint-some-rule -->
2560Some javascript text here.
2561<!-- lint enable remark-lint-some-rule -->
2562<!-- lint ignore remark-lint-some-rule -->
2563More javascript text.
2564";
2565        let ctx = create_context(content);
2566        let result = rule.check(&ctx).unwrap();
2567
2568        assert_eq!(
2569            result.len(),
2570            2,
2571            "Should only flag body lines, not remark-lint config comments"
2572        );
2573        assert_eq!(result[0].line, 2);
2574        assert_eq!(result[1].line, 5);
2575    }
2576
2577    #[test]
2578    fn test_fix_does_not_modify_vale_remark_lint_comments() {
2579        let config = MD044Config {
2580            names: vec!["JavaScript".to_string(), "Vale".to_string()],
2581            ..MD044Config::default()
2582        };
2583        let rule = MD044ProperNames::from_config_struct(config);
2584
2585        let content = "\
2586<!-- vale off -->
2587Some javascript text.
2588<!-- vale on -->
2589<!-- lint disable remark-lint-some-rule -->
2590More javascript text.
2591<!-- lint enable remark-lint-some-rule -->
2592";
2593        let ctx = create_context(content);
2594        let fixed = rule.fix(&ctx).unwrap();
2595
2596        // Config directive lines must be preserved unchanged
2597        assert!(fixed.contains("<!-- vale off -->"));
2598        assert!(fixed.contains("<!-- vale on -->"));
2599        assert!(fixed.contains("<!-- lint disable remark-lint-some-rule -->"));
2600        assert!(fixed.contains("<!-- lint enable remark-lint-some-rule -->"));
2601        // Body text should be fixed
2602        assert!(fixed.contains("Some JavaScript text."));
2603        assert!(fixed.contains("More JavaScript text."));
2604    }
2605
2606    #[test]
2607    fn test_mixed_tool_directives_all_skipped() {
2608        let config = MD044Config {
2609            names: vec!["JavaScript".to_string(), "Vale".to_string()],
2610            ..MD044Config::default()
2611        };
2612        let rule = MD044ProperNames::from_config_struct(config);
2613
2614        let content = "\
2615<!-- rumdl-disable MD044 -->
2616Some javascript text.
2617<!-- markdownlint-disable -->
2618More javascript text.
2619<!-- vale off -->
2620Even more javascript text.
2621<!-- lint disable some-rule -->
2622Final javascript text.
2623<!-- rumdl-enable MD044 -->
2624<!-- markdownlint-enable -->
2625<!-- vale on -->
2626<!-- lint enable some-rule -->
2627";
2628        let ctx = create_context(content);
2629        let result = rule.check(&ctx).unwrap();
2630
2631        // Only body text lines should be flagged (lines 2, 4, 6, 8)
2632        assert_eq!(
2633            result.len(),
2634            4,
2635            "Should only flag body lines, not any tool directive comments"
2636        );
2637        assert_eq!(result[0].line, 2);
2638        assert_eq!(result[1].line, 4);
2639        assert_eq!(result[2].line, 6);
2640        assert_eq!(result[3].line, 8);
2641    }
2642
2643    #[test]
2644    fn test_vale_remark_lint_edge_cases_not_matched() {
2645        let config = MD044Config {
2646            names: vec!["JavaScript".to_string(), "Vale".to_string()],
2647            ..MD044Config::default()
2648        };
2649        let rule = MD044ProperNames::from_config_struct(config);
2650
2651        // These are regular HTML comments, NOT tool directives:
2652        // - "<!-- vale -->" is not a valid Vale directive (no action keyword)
2653        // - "<!-- vale is a tool -->" starts with "vale" but is prose, not a directive
2654        // - "<!-- valedictorian javascript -->" does not start with "<!-- vale "
2655        // - "<!-- linting javascript tips -->" does not start with "<!-- lint "
2656        // - "<!-- vale javascript -->" starts with "vale" but has no action keyword
2657        // - "<!-- lint your javascript code -->" starts with "lint" but has no action keyword
2658        let content = "\
2659<!-- vale -->
2660<!-- vale is a tool for writing -->
2661<!-- valedictorian javascript -->
2662<!-- linting javascript tips -->
2663<!-- vale javascript -->
2664<!-- lint your javascript code -->
2665";
2666        let ctx = create_context(content);
2667        let result = rule.check(&ctx).unwrap();
2668
2669        // Line 1: "<!-- vale -->" contains "vale" (wrong case for "Vale") -> flagged
2670        // Line 2: "<!-- vale is a tool for writing -->" contains "vale" -> flagged
2671        // Line 3: "<!-- valedictorian javascript -->" contains "javascript" -> flagged
2672        // Line 4: "<!-- linting javascript tips -->" contains "javascript" -> flagged
2673        // Line 5: "<!-- vale javascript -->" contains "vale" and "javascript" -> flagged for both
2674        // Line 6: "<!-- lint your javascript code -->" contains "javascript" -> flagged
2675        assert_eq!(
2676            result.len(),
2677            7,
2678            "Should flag proper names in non-directive HTML comments: got {result:?}"
2679        );
2680        assert_eq!(result[0].line, 1); // "vale" in <!-- vale -->
2681        assert_eq!(result[1].line, 2); // "vale" in <!-- vale is a tool -->
2682        assert_eq!(result[2].line, 3); // "javascript" in <!-- valedictorian javascript -->
2683        assert_eq!(result[3].line, 4); // "javascript" in <!-- linting javascript tips -->
2684        assert_eq!(result[4].line, 5); // "vale" in <!-- vale javascript -->
2685        assert_eq!(result[5].line, 5); // "javascript" in <!-- vale javascript -->
2686        assert_eq!(result[6].line, 6); // "javascript" in <!-- lint your javascript code -->
2687    }
2688
2689    #[test]
2690    fn test_vale_style_directives_skipped() {
2691        let config = MD044Config {
2692            names: vec!["JavaScript".to_string(), "Vale".to_string()],
2693            ..MD044Config::default()
2694        };
2695        let rule = MD044ProperNames::from_config_struct(config);
2696
2697        // These ARE valid Vale directives and should be skipped:
2698        let content = "\
2699<!-- vale style = MyStyle -->
2700<!-- vale styles = Style1, Style2 -->
2701<!-- vale MyRule.Name = YES -->
2702<!-- vale MyRule.Name = NO -->
2703Some javascript text.
2704";
2705        let ctx = create_context(content);
2706        let result = rule.check(&ctx).unwrap();
2707
2708        // Only line 5 (body text) should be flagged
2709        assert_eq!(
2710            result.len(),
2711            1,
2712            "Should only flag body lines, not Vale style/rule directives: got {result:?}"
2713        );
2714        assert_eq!(result[0].line, 5);
2715    }
2716
2717    // --- is_in_backtick_code_in_line unit tests ---
2718
2719    #[test]
2720    fn test_backtick_code_single_backticks() {
2721        let line = "hello `world` bye";
2722        // 'w' is at index 7, inside the backtick span (content between backticks at 6 and 12)
2723        assert!(MD044ProperNames::is_in_backtick_code_in_line(line, 7));
2724        // 'h' at index 0 is outside
2725        assert!(!MD044ProperNames::is_in_backtick_code_in_line(line, 0));
2726        // 'b' at index 14 is outside
2727        assert!(!MD044ProperNames::is_in_backtick_code_in_line(line, 14));
2728    }
2729
2730    #[test]
2731    fn test_backtick_code_double_backticks() {
2732        let line = "a ``code`` b";
2733        // 'c' is at index 4, inside ``...``
2734        assert!(MD044ProperNames::is_in_backtick_code_in_line(line, 4));
2735        // 'a' at index 0 is outside
2736        assert!(!MD044ProperNames::is_in_backtick_code_in_line(line, 0));
2737        // 'b' at index 11 is outside
2738        assert!(!MD044ProperNames::is_in_backtick_code_in_line(line, 11));
2739    }
2740
2741    #[test]
2742    fn test_backtick_code_unclosed() {
2743        let line = "a `code b";
2744        // No closing backtick, so nothing is a code span
2745        assert!(!MD044ProperNames::is_in_backtick_code_in_line(line, 3));
2746    }
2747
2748    #[test]
2749    fn test_backtick_code_mismatched_count() {
2750        // Single backtick opening, double backtick is not a match
2751        let line = "a `code`` b";
2752        // The single ` at index 2 doesn't match `` at index 7-8
2753        // So 'c' at index 3 is NOT in a code span
2754        assert!(!MD044ProperNames::is_in_backtick_code_in_line(line, 3));
2755    }
2756
2757    #[test]
2758    fn test_backtick_code_multiple_spans() {
2759        let line = "`first` and `second`";
2760        // 'f' at index 1 (inside first span)
2761        assert!(MD044ProperNames::is_in_backtick_code_in_line(line, 1));
2762        // 'a' at index 8 (between spans)
2763        assert!(!MD044ProperNames::is_in_backtick_code_in_line(line, 8));
2764        // 's' at index 13 (inside second span)
2765        assert!(MD044ProperNames::is_in_backtick_code_in_line(line, 13));
2766    }
2767
2768    #[test]
2769    fn test_backtick_code_on_backtick_boundary() {
2770        let line = "`code`";
2771        // Position 0 is the opening backtick itself, not inside the span
2772        assert!(!MD044ProperNames::is_in_backtick_code_in_line(line, 0));
2773        // Position 5 is the closing backtick, not inside the span
2774        assert!(!MD044ProperNames::is_in_backtick_code_in_line(line, 5));
2775        // Position 1-4 are inside the span
2776        assert!(MD044ProperNames::is_in_backtick_code_in_line(line, 1));
2777        assert!(MD044ProperNames::is_in_backtick_code_in_line(line, 4));
2778    }
2779}
rumdl_lib/rules/md044_proper_names.rs

rumdl_lib/rules/
md044_proper_names.rs