rumdl_lib/rules/
md044_proper_names.rs

1use crate::utils::fast_hash;
2use crate::utils::regex_cache::{escape_regex, get_cached_fancy_regex};
3
4use crate::rule::{Fix, LintError, LintResult, LintWarning, Rule, Severity};
5use std::collections::{HashMap, HashSet};
6use std::sync::{Arc, Mutex};
7
8mod md044_config;
9pub use md044_config::MD044Config;
10
11type WarningPosition = (usize, usize, String); // (line, column, found_name)
12
13/// Rule MD044: Proper names should be capitalized
14///
15/// See [docs/md044.md](../../docs/md044.md) for full documentation, configuration, and examples.
16///
17/// This rule is triggered when proper names are not capitalized correctly in the document.
18/// For example, if you have defined "JavaScript" as a proper name, the rule will flag any
19/// occurrences of "javascript" or "Javascript" as violations.
20///
21/// ## Purpose
22///
23/// Ensuring consistent capitalization of proper names improves document quality and
24/// professionalism. This is especially important for technical documentation where
25/// product names, programming languages, and technologies often have specific
26/// capitalization conventions.
27///
28/// ## Configuration Options
29///
30/// The rule supports the following configuration options:
31///
32/// ```yaml
33/// MD044:
34///   names: []                # List of proper names to check for correct capitalization
35///   code-blocks: false       # Whether to check code blocks (default: false)
36/// ```
37///
38/// Example configuration:
39///
40/// ```yaml
41/// MD044:
42///   names: ["JavaScript", "Node.js", "TypeScript"]
43///   code-blocks: true
44/// ```
45///
46/// ## Performance Optimizations
47///
48/// This rule implements several performance optimizations:
49///
50/// 1. **Regex Caching**: Pre-compiles and caches regex patterns for each proper name
51/// 2. **Content Caching**: Caches results based on content hashing for repeated checks
52/// 3. **Efficient Text Processing**: Uses optimized algorithms to avoid redundant text processing
53/// 4. **Smart Code Block Detection**: Efficiently identifies and optionally excludes code blocks
54///
55/// ## Edge Cases Handled
56///
57/// - **Word Boundaries**: Only matches complete words, not substrings within other words
58/// - **Case Sensitivity**: Properly handles case-specific matching
59/// - **Code Blocks**: Optionally checks code blocks (controlled by code-blocks setting)
60/// - **Markdown Formatting**: Handles proper names within Markdown formatting elements
61///
62/// ## Fix Behavior
63///
64/// When fixing issues, this rule replaces incorrect capitalization with the correct form
65/// as defined in the configuration.
66///
67/// Check if a trimmed line is an inline config comment from a linting tool.
68/// Recognized tools: rumdl, markdownlint, Vale, and remark-lint.
69fn is_inline_config_comment(trimmed: &str) -> bool {
70    trimmed.starts_with("<!-- rumdl-")
71        || trimmed.starts_with("<!-- markdownlint-")
72        || trimmed.starts_with("<!-- vale off")
73        || trimmed.starts_with("<!-- vale on")
74        || (trimmed.starts_with("<!-- vale ") && trimmed.contains(" = "))
75        || trimmed.starts_with("<!-- vale style")
76        || trimmed.starts_with("<!-- lint disable ")
77        || trimmed.starts_with("<!-- lint enable ")
78        || trimmed.starts_with("<!-- lint ignore ")
79}
80
81#[derive(Clone)]
82pub struct MD044ProperNames {
83    config: MD044Config,
84    // Cache the combined regex pattern string
85    combined_pattern: Option<String>,
86    // Precomputed lowercase name variants for fast pre-checks
87    name_variants: Vec<String>,
88    // Cache for name violations by content hash
89    content_cache: Arc<Mutex<HashMap<u64, Vec<WarningPosition>>>>,
90}
91
92impl MD044ProperNames {
93    pub fn new(names: Vec<String>, code_blocks: bool) -> Self {
94        let config = MD044Config {
95            names,
96            code_blocks,
97            html_elements: true, // Default to checking HTML elements
98            html_comments: true, // Default to checking HTML comments
99        };
100        let combined_pattern = Self::create_combined_pattern(&config);
101        let name_variants = Self::build_name_variants(&config);
102        Self {
103            config,
104            combined_pattern,
105            name_variants,
106            content_cache: Arc::new(Mutex::new(HashMap::new())),
107        }
108    }
109
110    // Helper function for consistent ASCII normalization
111    fn ascii_normalize(s: &str) -> String {
112        s.replace(['é', 'è', 'ê', 'ë'], "e")
113            .replace(['à', 'á', 'â', 'ä', 'ã', 'å'], "a")
114            .replace(['ï', 'î', 'í', 'ì'], "i")
115            .replace(['ü', 'ú', 'ù', 'û'], "u")
116            .replace(['ö', 'ó', 'ò', 'ô', 'õ'], "o")
117            .replace('ñ', "n")
118            .replace('ç', "c")
119    }
120
121    pub fn from_config_struct(config: MD044Config) -> Self {
122        let combined_pattern = Self::create_combined_pattern(&config);
123        let name_variants = Self::build_name_variants(&config);
124        Self {
125            config,
126            combined_pattern,
127            name_variants,
128            content_cache: Arc::new(Mutex::new(HashMap::new())),
129        }
130    }
131
132    // Create a combined regex pattern for all proper names
133    fn create_combined_pattern(config: &MD044Config) -> Option<String> {
134        if config.names.is_empty() {
135            return None;
136        }
137
138        // Create patterns for all names and their variations
139        let mut patterns: Vec<String> = config
140            .names
141            .iter()
142            .flat_map(|name| {
143                let mut variations = vec![];
144                let lower_name = name.to_lowercase();
145
146                // Add the lowercase version
147                variations.push(escape_regex(&lower_name));
148
149                // Add version without dots
150                let lower_name_no_dots = lower_name.replace('.', "");
151                if lower_name != lower_name_no_dots {
152                    variations.push(escape_regex(&lower_name_no_dots));
153                }
154
155                // Add ASCII-normalized versions for common accented characters
156                let ascii_normalized = Self::ascii_normalize(&lower_name);
157
158                if ascii_normalized != lower_name {
159                    variations.push(escape_regex(&ascii_normalized));
160
161                    // Also add version without dots
162                    let ascii_no_dots = ascii_normalized.replace('.', "");
163                    if ascii_normalized != ascii_no_dots {
164                        variations.push(escape_regex(&ascii_no_dots));
165                    }
166                }
167
168                variations
169            })
170            .collect();
171
172        // Sort patterns by length (longest first) to avoid shorter patterns matching within longer ones
173        patterns.sort_by_key(|b| std::cmp::Reverse(b.len()));
174
175        // Combine all patterns into a single regex with capture groups
176        // Don't use \b as it doesn't work with Unicode - we'll check boundaries manually
177        Some(format!(r"(?i)({})", patterns.join("|")))
178    }
179
180    fn build_name_variants(config: &MD044Config) -> Vec<String> {
181        let mut variants = HashSet::new();
182        for name in &config.names {
183            let lower_name = name.to_lowercase();
184            variants.insert(lower_name.clone());
185
186            let lower_no_dots = lower_name.replace('.', "");
187            if lower_name != lower_no_dots {
188                variants.insert(lower_no_dots);
189            }
190
191            let ascii_normalized = Self::ascii_normalize(&lower_name);
192            if ascii_normalized != lower_name {
193                variants.insert(ascii_normalized.clone());
194
195                let ascii_no_dots = ascii_normalized.replace('.', "");
196                if ascii_normalized != ascii_no_dots {
197                    variants.insert(ascii_no_dots);
198                }
199            }
200        }
201
202        variants.into_iter().collect()
203    }
204
205    // Find all name violations in the content and return positions.
206    // `content_lower` is the pre-computed lowercase version of `content` to avoid redundant allocations.
207    fn find_name_violations(
208        &self,
209        content: &str,
210        ctx: &crate::lint_context::LintContext,
211        content_lower: &str,
212    ) -> Vec<WarningPosition> {
213        // Early return: if no names configured or content is empty
214        if self.config.names.is_empty() || content.is_empty() || self.combined_pattern.is_none() {
215            return Vec::new();
216        }
217
218        // Early return: quick check if any of the configured names might be in content
219        let has_potential_matches = self.name_variants.iter().any(|name| content_lower.contains(name));
220
221        if !has_potential_matches {
222            return Vec::new();
223        }
224
225        // Check if we have cached results
226        let hash = fast_hash(content);
227        {
228            // Use a separate scope for borrowing to minimize lock time
229            if let Ok(cache) = self.content_cache.lock()
230                && let Some(cached) = cache.get(&hash)
231            {
232                return cached.clone();
233            }
234        }
235
236        let mut violations = Vec::new();
237
238        // Get the regex from global cache
239        let combined_regex = match &self.combined_pattern {
240            Some(pattern) => match get_cached_fancy_regex(pattern) {
241                Ok(regex) => regex,
242                Err(_) => return Vec::new(),
243            },
244            None => return Vec::new(),
245        };
246
247        // Use ctx.lines for better performance
248        for (line_idx, line_info) in ctx.lines.iter().enumerate() {
249            let line_num = line_idx + 1;
250            let line = line_info.content(ctx.content);
251
252            // Skip code fence lines (```language or ~~~language)
253            let trimmed = line.trim_start();
254            if trimmed.starts_with("```") || trimmed.starts_with("~~~") {
255                continue;
256            }
257
258            // Skip if in code block (when code_blocks = false)
259            if !self.config.code_blocks && line_info.in_code_block {
260                continue;
261            }
262
263            // Skip if in HTML block (when html_elements = false)
264            if !self.config.html_elements && line_info.in_html_block {
265                continue;
266            }
267
268            // Skip HTML comments using pre-computed line flag
269            if !self.config.html_comments && line_info.in_html_comment {
270                continue;
271            }
272
273            // Skip JSX expressions and MDX comments (MDX flavor)
274            if line_info.in_jsx_expression || line_info.in_mdx_comment {
275                continue;
276            }
277
278            // Skip Obsidian comments (Obsidian flavor)
279            if line_info.in_obsidian_comment {
280                continue;
281            }
282
283            // For frontmatter lines, determine offset where checkable value content starts.
284            // YAML keys should not be checked against proper names - only values.
285            let fm_value_offset = if line_info.in_front_matter {
286                Self::frontmatter_value_offset(line)
287            } else {
288                0
289            };
290            if fm_value_offset == usize::MAX {
291                continue;
292            }
293
294            // Skip inline config comments (rumdl, markdownlint, Vale, remark-lint directives)
295            if is_inline_config_comment(trimmed) {
296                continue;
297            }
298
299            // Early return: skip lines that don't contain any potential matches
300            let line_lower = line.to_lowercase();
301            let has_line_matches = self.name_variants.iter().any(|name| line_lower.contains(name));
302
303            if !has_line_matches {
304                continue;
305            }
306
307            // Use the combined regex to find all matches in one pass
308            for cap_result in combined_regex.find_iter(line) {
309                match cap_result {
310                    Ok(cap) => {
311                        let found_name = &line[cap.start()..cap.end()];
312
313                        // Check word boundaries manually for Unicode support
314                        let start_pos = cap.start();
315                        let end_pos = cap.end();
316
317                        // Skip matches in the key portion of frontmatter lines
318                        if start_pos < fm_value_offset {
319                            continue;
320                        }
321
322                        // Skip matches inside HTML tag attributes (handles multi-line tags)
323                        let byte_pos = line_info.byte_offset + start_pos;
324                        if ctx.is_in_html_tag(byte_pos) {
325                            continue;
326                        }
327
328                        if !Self::is_at_word_boundary(line, start_pos, true)
329                            || !Self::is_at_word_boundary(line, end_pos, false)
330                        {
331                            continue; // Not at word boundary
332                        }
333
334                        // Skip if in inline code when code_blocks is false
335                        if !self.config.code_blocks {
336                            if ctx.is_in_code_block_or_span(byte_pos) {
337                                continue;
338                            }
339                            // pulldown-cmark doesn't parse markdown syntax inside HTML
340                            // comments or HTML blocks, so backtick-wrapped text isn't
341                            // detected by is_in_code_block_or_span. Check directly.
342                            if (line_info.in_html_comment || line_info.in_html_block)
343                                && Self::is_in_backtick_code_in_line(line, start_pos)
344                            {
345                                continue;
346                            }
347                        }
348
349                        // Skip if in link URL or reference definition
350                        if Self::is_in_link(ctx, byte_pos) {
351                            continue;
352                        }
353
354                        // Skip if inside an angle-bracket URL (e.g., <https://...>)
355                        // The link parser skips autolinks inside HTML comments,
356                        // so we detect them directly in the line text.
357                        if Self::is_in_angle_bracket_url(line, start_pos) {
358                            continue;
359                        }
360
361                        // Find which proper name this matches
362                        if let Some(proper_name) = self.get_proper_name_for(found_name) {
363                            // Only flag if it's not already correct
364                            if found_name != proper_name {
365                                violations.push((line_num, cap.start() + 1, found_name.to_string()));
366                            }
367                        }
368                    }
369                    Err(e) => {
370                        eprintln!("Regex execution error on line {line_num}: {e}");
371                    }
372                }
373            }
374        }
375
376        // Store in cache (ignore if mutex is poisoned)
377        if let Ok(mut cache) = self.content_cache.lock() {
378            cache.insert(hash, violations.clone());
379        }
380        violations
381    }
382
383    /// Check if a byte position is within a link URL (not link text)
384    ///
385    /// Link text should be checked for proper names, but URLs should be skipped.
386    /// For `[text](url)` - check text, skip url
387    /// For `[text][ref]` - check text, skip reference portion
388    /// For `[[text]]` (WikiLinks) - check text, skip brackets
389    fn is_in_link(ctx: &crate::lint_context::LintContext, byte_pos: usize) -> bool {
390        use pulldown_cmark::LinkType;
391
392        // Binary search links (sorted by byte_offset) to find candidate containing byte_pos
393        let link_idx = ctx.links.partition_point(|link| link.byte_offset <= byte_pos);
394        if link_idx > 0 {
395            let link = &ctx.links[link_idx - 1];
396            if byte_pos < link.byte_end {
397                // WikiLinks [[text]] start with '[[', regular links [text] start with '['
398                let text_start = if matches!(link.link_type, LinkType::WikiLink { .. }) {
399                    link.byte_offset + 2
400                } else {
401                    link.byte_offset + 1
402                };
403                let text_end = text_start + link.text.len();
404
405                // If position is within the text portion, skip only if text is a URL
406                if byte_pos >= text_start && byte_pos < text_end {
407                    return Self::link_text_is_url(&link.text);
408                }
409                // Position is in the URL/reference portion, skip it
410                return true;
411            }
412        }
413
414        // Binary search images (sorted by byte_offset) to find candidate containing byte_pos
415        let image_idx = ctx.images.partition_point(|img| img.byte_offset <= byte_pos);
416        if image_idx > 0 {
417            let image = &ctx.images[image_idx - 1];
418            if byte_pos < image.byte_end {
419                // Image starts with '![' so alt text starts at byte_offset + 2
420                let alt_start = image.byte_offset + 2;
421                let alt_end = alt_start + image.alt_text.len();
422
423                // If position is within the alt text portion, don't skip
424                if byte_pos >= alt_start && byte_pos < alt_end {
425                    return false;
426                }
427                // Position is in the URL/reference portion, skip it
428                return true;
429            }
430        }
431
432        // Check pre-computed reference definitions
433        ctx.is_in_reference_def(byte_pos)
434    }
435
436    /// Check if link text is a URL that should not have proper name corrections.
437    /// Matches markdownlint behavior: skip text starting with `http://`, `https://`, or `www.`.
438    fn link_text_is_url(text: &str) -> bool {
439        let lower = text.trim().to_ascii_lowercase();
440        lower.starts_with("http://") || lower.starts_with("https://") || lower.starts_with("www.")
441    }
442
443    /// Check if a position within a line falls inside an angle-bracket URL (`<scheme://...>`).
444    ///
445    /// The link parser skips autolinks inside HTML comments, so `ctx.links` won't
446    /// contain them. This function detects angle-bracket URLs directly in the line
447    /// text, covering both HTML comments and regular text as a safety net.
448    fn is_in_angle_bracket_url(line: &str, pos: usize) -> bool {
449        let bytes = line.as_bytes();
450        let len = bytes.len();
451        let mut i = 0;
452        while i < len {
453            if bytes[i] == b'<' {
454                let after_open = i + 1;
455                // Check for a valid URI scheme per CommonMark autolink spec:
456                // scheme = [a-zA-Z][a-zA-Z0-9+.-]{0,31}
457                // followed by ':'
458                if after_open < len && bytes[after_open].is_ascii_alphabetic() {
459                    let mut s = after_open + 1;
460                    let scheme_max = (after_open + 32).min(len);
461                    while s < scheme_max
462                        && (bytes[s].is_ascii_alphanumeric()
463                            || bytes[s] == b'+'
464                            || bytes[s] == b'-'
465                            || bytes[s] == b'.')
466                    {
467                        s += 1;
468                    }
469                    if s < len && bytes[s] == b':' {
470                        // Valid scheme found; scan for closing '>' with no spaces or '<'
471                        let mut j = s + 1;
472                        let mut found_close = false;
473                        while j < len {
474                            match bytes[j] {
475                                b'>' => {
476                                    found_close = true;
477                                    break;
478                                }
479                                b' ' | b'<' => break,
480                                _ => j += 1,
481                            }
482                        }
483                        if found_close && pos >= i && pos <= j {
484                            return true;
485                        }
486                        if found_close {
487                            i = j + 1;
488                            continue;
489                        }
490                    }
491                }
492            }
493            i += 1;
494        }
495        false
496    }
497
498    /// Check if a position within a line falls inside backtick-delimited code.
499    ///
500    /// pulldown-cmark does not parse markdown syntax inside HTML comments, so
501    /// `ctx.is_in_code_block_or_span` returns false for backtick-wrapped text
502    /// within comments. This function detects backtick code spans directly in
503    /// the line text following CommonMark rules: a code span starts with N
504    /// backticks and ends with exactly N backticks.
505    fn is_in_backtick_code_in_line(line: &str, pos: usize) -> bool {
506        let bytes = line.as_bytes();
507        let len = bytes.len();
508        let mut i = 0;
509        while i < len {
510            if bytes[i] == b'`' {
511                // Count the opening backtick sequence length
512                let open_start = i;
513                while i < len && bytes[i] == b'`' {
514                    i += 1;
515                }
516                let tick_len = i - open_start;
517
518                // Scan forward for a closing sequence of exactly tick_len backticks
519                while i < len {
520                    if bytes[i] == b'`' {
521                        let close_start = i;
522                        while i < len && bytes[i] == b'`' {
523                            i += 1;
524                        }
525                        if i - close_start == tick_len {
526                            // Matched pair found; the code span content is between
527                            // the end of the opening backticks and the start of the
528                            // closing backticks (exclusive of the backticks themselves).
529                            let content_start = open_start + tick_len;
530                            let content_end = close_start;
531                            if pos >= content_start && pos < content_end {
532                                return true;
533                            }
534                            // Continue scanning after this pair
535                            break;
536                        }
537                        // Not the right length; keep scanning
538                    } else {
539                        i += 1;
540                    }
541                }
542            } else {
543                i += 1;
544            }
545        }
546        false
547    }
548
549    // Check if a character is a word boundary (handles Unicode)
550    fn is_word_boundary_char(c: char) -> bool {
551        !c.is_alphanumeric()
552    }
553
554    // Check if position is at a word boundary using byte-level lookups.
555    fn is_at_word_boundary(content: &str, pos: usize, is_start: bool) -> bool {
556        if is_start {
557            if pos == 0 {
558                return true;
559            }
560            match content[..pos].chars().next_back() {
561                None => true,
562                Some(c) => Self::is_word_boundary_char(c),
563            }
564        } else {
565            if pos >= content.len() {
566                return true;
567            }
568            match content[pos..].chars().next() {
569                None => true,
570                Some(c) => Self::is_word_boundary_char(c),
571            }
572        }
573    }
574
575    /// For a frontmatter line, return the byte offset where the checkable
576    /// value portion starts. Returns `usize::MAX` if the entire line should be
577    /// skipped (frontmatter delimiters, key-only lines, YAML comments, flow constructs).
578    fn frontmatter_value_offset(line: &str) -> usize {
579        let trimmed = line.trim();
580
581        // Skip frontmatter delimiters and empty lines
582        if trimmed == "---" || trimmed == "+++" || trimmed.is_empty() {
583            return usize::MAX;
584        }
585
586        // Skip YAML comments
587        if trimmed.starts_with('#') {
588            return usize::MAX;
589        }
590
591        // YAML list item: "  - item" or "  - key: value"
592        let stripped = line.trim_start();
593        if let Some(after_dash) = stripped.strip_prefix("- ") {
594            let leading = line.len() - stripped.len();
595            // Check if the list item contains a mapping (e.g., "- key: value")
596            if let Some(result) = Self::kv_value_offset(line, after_dash, leading + 2) {
597                return result;
598            }
599            // Bare list item value (no colon) - check content after "- "
600            return leading + 2;
601        }
602        if stripped == "-" {
603            return usize::MAX;
604        }
605
606        // Key-value pair with colon separator (YAML): "key: value"
607        if let Some(result) = Self::kv_value_offset(line, stripped, line.len() - stripped.len()) {
608            return result;
609        }
610
611        // Key-value pair with equals separator (TOML): "key = value"
612        if let Some(eq_pos) = line.find('=') {
613            let after_eq = eq_pos + 1;
614            if after_eq < line.len() && line.as_bytes()[after_eq] == b' ' {
615                let value_start = after_eq + 1;
616                let value_slice = &line[value_start..];
617                let value_trimmed = value_slice.trim();
618                if value_trimmed.is_empty() {
619                    return usize::MAX;
620                }
621                // For quoted values, skip the opening quote character
622                if (value_trimmed.starts_with('"') && value_trimmed.ends_with('"'))
623                    || (value_trimmed.starts_with('\'') && value_trimmed.ends_with('\''))
624                {
625                    let quote_offset = value_slice.find(['"', '\'']).unwrap_or(0);
626                    return value_start + quote_offset + 1;
627                }
628                return value_start;
629            }
630            // Equals with no space after or at end of line -> no value to check
631            return usize::MAX;
632        }
633
634        // No separator found - continuation line or bare value, check the whole line
635        0
636    }
637
638    /// Parse a key-value pair using colon separator within `content` that starts
639    /// at `base_offset` in the original line. Returns `Some(offset)` if a colon
640    /// separator is found, `None` if no colon is present.
641    fn kv_value_offset(line: &str, content: &str, base_offset: usize) -> Option<usize> {
642        let colon_pos = content.find(':')?;
643        let abs_colon = base_offset + colon_pos;
644        let after_colon = abs_colon + 1;
645        if after_colon < line.len() && line.as_bytes()[after_colon] == b' ' {
646            let value_start = after_colon + 1;
647            let value_slice = &line[value_start..];
648            let value_trimmed = value_slice.trim();
649            if value_trimmed.is_empty() {
650                return Some(usize::MAX);
651            }
652            // Skip flow mappings and flow sequences - too complex for heuristic parsing
653            if value_trimmed.starts_with('{') || value_trimmed.starts_with('[') {
654                return Some(usize::MAX);
655            }
656            // For quoted values, skip the opening quote character
657            if (value_trimmed.starts_with('"') && value_trimmed.ends_with('"'))
658                || (value_trimmed.starts_with('\'') && value_trimmed.ends_with('\''))
659            {
660                let quote_offset = value_slice.find(['"', '\'']).unwrap_or(0);
661                return Some(value_start + quote_offset + 1);
662            }
663            return Some(value_start);
664        }
665        // Colon with no space after or at end of line -> no value to check
666        Some(usize::MAX)
667    }
668
669    // Get the proper name that should be used for a found name
670    fn get_proper_name_for(&self, found_name: &str) -> Option<String> {
671        let found_lower = found_name.to_lowercase();
672
673        // Iterate through the configured proper names
674        for name in &self.config.names {
675            let lower_name = name.to_lowercase();
676            let lower_name_no_dots = lower_name.replace('.', "");
677
678            // Direct match
679            if found_lower == lower_name || found_lower == lower_name_no_dots {
680                return Some(name.clone());
681            }
682
683            // Check ASCII-normalized version
684            let ascii_normalized = Self::ascii_normalize(&lower_name);
685
686            let ascii_no_dots = ascii_normalized.replace('.', "");
687
688            if found_lower == ascii_normalized || found_lower == ascii_no_dots {
689                return Some(name.clone());
690            }
691        }
692        None
693    }
694}
695
696impl Rule for MD044ProperNames {
697    fn name(&self) -> &'static str {
698        "MD044"
699    }
700
701    fn description(&self) -> &'static str {
702        "Proper names should have the correct capitalization"
703    }
704
705    fn should_skip(&self, ctx: &crate::lint_context::LintContext) -> bool {
706        if self.config.names.is_empty() {
707            return true;
708        }
709        // Quick check if any configured name variants exist (case-insensitive)
710        let content_lower = if ctx.content.is_ascii() {
711            ctx.content.to_ascii_lowercase()
712        } else {
713            ctx.content.to_lowercase()
714        };
715        !self.name_variants.iter().any(|name| content_lower.contains(name))
716    }
717
718    fn check(&self, ctx: &crate::lint_context::LintContext) -> LintResult {
719        let content = ctx.content;
720        if content.is_empty() || self.config.names.is_empty() || self.combined_pattern.is_none() {
721            return Ok(Vec::new());
722        }
723
724        // Compute lowercase content once and reuse across all checks
725        let content_lower = if content.is_ascii() {
726            content.to_ascii_lowercase()
727        } else {
728            content.to_lowercase()
729        };
730
731        // Early return: use pre-computed name_variants for the quick check
732        let has_potential_matches = self.name_variants.iter().any(|name| content_lower.contains(name));
733
734        if !has_potential_matches {
735            return Ok(Vec::new());
736        }
737
738        let line_index = &ctx.line_index;
739        let violations = self.find_name_violations(content, ctx, &content_lower);
740
741        let warnings = violations
742            .into_iter()
743            .filter_map(|(line, column, found_name)| {
744                self.get_proper_name_for(&found_name).map(|proper_name| LintWarning {
745                    rule_name: Some(self.name().to_string()),
746                    line,
747                    column,
748                    end_line: line,
749                    end_column: column + found_name.len(),
750                    message: format!("Proper name '{found_name}' should be '{proper_name}'"),
751                    severity: Severity::Warning,
752                    fix: Some(Fix {
753                        range: line_index.line_col_to_byte_range_with_length(line, column, found_name.len()),
754                        replacement: proper_name,
755                    }),
756                })
757            })
758            .collect();
759
760        Ok(warnings)
761    }
762
763    fn fix(&self, ctx: &crate::lint_context::LintContext) -> Result<String, LintError> {
764        let content = ctx.content;
765        if content.is_empty() || self.config.names.is_empty() {
766            return Ok(content.to_string());
767        }
768
769        let content_lower = if content.is_ascii() {
770            content.to_ascii_lowercase()
771        } else {
772            content.to_lowercase()
773        };
774        let violations = self.find_name_violations(content, ctx, &content_lower);
775        if violations.is_empty() {
776            return Ok(content.to_string());
777        }
778
779        // Process lines and build the fixed content
780        let mut fixed_lines = Vec::new();
781
782        // Group violations by line
783        let mut violations_by_line: HashMap<usize, Vec<(usize, String)>> = HashMap::new();
784        for (line_num, col_num, found_name) in violations {
785            violations_by_line
786                .entry(line_num)
787                .or_default()
788                .push((col_num, found_name));
789        }
790
791        // Sort violations within each line in reverse order
792        for violations in violations_by_line.values_mut() {
793            violations.sort_by_key(|b| std::cmp::Reverse(b.0));
794        }
795
796        // Process each line
797        for (line_idx, line_info) in ctx.lines.iter().enumerate() {
798            let line_num = line_idx + 1;
799
800            // Skip lines where this rule is disabled by inline config
801            if ctx.inline_config().is_rule_disabled(self.name(), line_num) {
802                fixed_lines.push(line_info.content(ctx.content).to_string());
803                continue;
804            }
805
806            if let Some(line_violations) = violations_by_line.get(&line_num) {
807                // This line has violations, fix them
808                let mut fixed_line = line_info.content(ctx.content).to_string();
809
810                for (col_num, found_name) in line_violations {
811                    if let Some(proper_name) = self.get_proper_name_for(found_name) {
812                        let start_col = col_num - 1; // Convert to 0-based
813                        let end_col = start_col + found_name.len();
814
815                        if end_col <= fixed_line.len()
816                            && fixed_line.is_char_boundary(start_col)
817                            && fixed_line.is_char_boundary(end_col)
818                        {
819                            fixed_line.replace_range(start_col..end_col, &proper_name);
820                        }
821                    }
822                }
823
824                fixed_lines.push(fixed_line);
825            } else {
826                // No violations on this line, keep it as is
827                fixed_lines.push(line_info.content(ctx.content).to_string());
828            }
829        }
830
831        // Join lines with newlines, preserving the original ending
832        let mut result = fixed_lines.join("\n");
833        if content.ends_with('\n') && !result.ends_with('\n') {
834            result.push('\n');
835        }
836        Ok(result)
837    }
838
839    fn as_any(&self) -> &dyn std::any::Any {
840        self
841    }
842
843    fn default_config_section(&self) -> Option<(String, toml::Value)> {
844        let json_value = serde_json::to_value(&self.config).ok()?;
845        Some((
846            self.name().to_string(),
847            crate::rule_config_serde::json_to_toml_value(&json_value)?,
848        ))
849    }
850
851    fn from_config(config: &crate::config::Config) -> Box<dyn Rule>
852    where
853        Self: Sized,
854    {
855        let rule_config = crate::rule_config_serde::load_rule_config::<MD044Config>(config);
856        Box::new(Self::from_config_struct(rule_config))
857    }
858}
859
860#[cfg(test)]
861mod tests {
862    use super::*;
863    use crate::lint_context::LintContext;
864
865    fn create_context(content: &str) -> LintContext<'_> {
866        LintContext::new(content, crate::config::MarkdownFlavor::Standard, None)
867    }
868
869    #[test]
870    fn test_correctly_capitalized_names() {
871        let rule = MD044ProperNames::new(
872            vec![
873                "JavaScript".to_string(),
874                "TypeScript".to_string(),
875                "Node.js".to_string(),
876            ],
877            true,
878        );
879
880        let content = "This document uses JavaScript, TypeScript, and Node.js correctly.";
881        let ctx = create_context(content);
882        let result = rule.check(&ctx).unwrap();
883        assert!(result.is_empty(), "Should not flag correctly capitalized names");
884    }
885
886    #[test]
887    fn test_incorrectly_capitalized_names() {
888        let rule = MD044ProperNames::new(vec!["JavaScript".to_string(), "TypeScript".to_string()], true);
889
890        let content = "This document uses javascript and typescript incorrectly.";
891        let ctx = create_context(content);
892        let result = rule.check(&ctx).unwrap();
893
894        assert_eq!(result.len(), 2, "Should flag two incorrect capitalizations");
895        assert_eq!(result[0].message, "Proper name 'javascript' should be 'JavaScript'");
896        assert_eq!(result[0].line, 1);
897        assert_eq!(result[0].column, 20);
898        assert_eq!(result[1].message, "Proper name 'typescript' should be 'TypeScript'");
899        assert_eq!(result[1].line, 1);
900        assert_eq!(result[1].column, 35);
901    }
902
903    #[test]
904    fn test_names_at_beginning_of_sentences() {
905        let rule = MD044ProperNames::new(vec!["JavaScript".to_string(), "Python".to_string()], true);
906
907        let content = "javascript is a great language. python is also popular.";
908        let ctx = create_context(content);
909        let result = rule.check(&ctx).unwrap();
910
911        assert_eq!(result.len(), 2, "Should flag names at beginning of sentences");
912        assert_eq!(result[0].line, 1);
913        assert_eq!(result[0].column, 1);
914        assert_eq!(result[1].line, 1);
915        assert_eq!(result[1].column, 33);
916    }
917
918    #[test]
919    fn test_names_in_code_blocks_checked_by_default() {
920        let rule = MD044ProperNames::new(vec!["JavaScript".to_string()], true);
921
922        let content = r#"Here is some text with JavaScript.
923
924```javascript
925// This javascript should be checked
926const lang = "javascript";
927```
928
929But this javascript should be flagged."#;
930
931        let ctx = create_context(content);
932        let result = rule.check(&ctx).unwrap();
933
934        assert_eq!(result.len(), 3, "Should flag javascript inside and outside code blocks");
935        assert_eq!(result[0].line, 4);
936        assert_eq!(result[1].line, 5);
937        assert_eq!(result[2].line, 8);
938    }
939
940    #[test]
941    fn test_names_in_code_blocks_ignored_when_disabled() {
942        let rule = MD044ProperNames::new(
943            vec!["JavaScript".to_string()],
944            false, // code_blocks = false means skip code blocks
945        );
946
947        let content = r#"```
948javascript in code block
949```"#;
950
951        let ctx = create_context(content);
952        let result = rule.check(&ctx).unwrap();
953
954        assert_eq!(
955            result.len(),
956            0,
957            "Should not flag javascript in code blocks when code_blocks is false"
958        );
959    }
960
961    #[test]
962    fn test_names_in_inline_code_checked_by_default() {
963        let rule = MD044ProperNames::new(vec!["JavaScript".to_string()], true);
964
965        let content = "This is `javascript` in inline code and javascript outside.";
966        let ctx = create_context(content);
967        let result = rule.check(&ctx).unwrap();
968
969        // When code_blocks=true, inline code should be checked
970        assert_eq!(result.len(), 2, "Should flag javascript inside and outside inline code");
971        assert_eq!(result[0].column, 10); // javascript in inline code
972        assert_eq!(result[1].column, 41); // javascript outside
973    }
974
975    #[test]
976    fn test_multiple_names_in_same_line() {
977        let rule = MD044ProperNames::new(
978            vec!["JavaScript".to_string(), "TypeScript".to_string(), "React".to_string()],
979            true,
980        );
981
982        let content = "I use javascript, typescript, and react in my projects.";
983        let ctx = create_context(content);
984        let result = rule.check(&ctx).unwrap();
985
986        assert_eq!(result.len(), 3, "Should flag all three incorrect names");
987        assert_eq!(result[0].message, "Proper name 'javascript' should be 'JavaScript'");
988        assert_eq!(result[1].message, "Proper name 'typescript' should be 'TypeScript'");
989        assert_eq!(result[2].message, "Proper name 'react' should be 'React'");
990    }
991
992    #[test]
993    fn test_case_sensitivity() {
994        let rule = MD044ProperNames::new(vec!["JavaScript".to_string()], true);
995
996        let content = "JAVASCRIPT, Javascript, javascript, and JavaScript variations.";
997        let ctx = create_context(content);
998        let result = rule.check(&ctx).unwrap();
999
1000        assert_eq!(result.len(), 3, "Should flag all incorrect case variations");
1001        // JavaScript (correct) should not be flagged
1002        assert!(result.iter().all(|w| w.message.contains("should be 'JavaScript'")));
1003    }
1004
1005    #[test]
1006    fn test_configuration_with_custom_name_list() {
1007        let config = MD044Config {
1008            names: vec!["GitHub".to_string(), "GitLab".to_string(), "DevOps".to_string()],
1009            code_blocks: true,
1010            html_elements: true,
1011            html_comments: true,
1012        };
1013        let rule = MD044ProperNames::from_config_struct(config);
1014
1015        let content = "We use github, gitlab, and devops for our workflow.";
1016        let ctx = create_context(content);
1017        let result = rule.check(&ctx).unwrap();
1018
1019        assert_eq!(result.len(), 3, "Should flag all custom names");
1020        assert_eq!(result[0].message, "Proper name 'github' should be 'GitHub'");
1021        assert_eq!(result[1].message, "Proper name 'gitlab' should be 'GitLab'");
1022        assert_eq!(result[2].message, "Proper name 'devops' should be 'DevOps'");
1023    }
1024
1025    #[test]
1026    fn test_empty_configuration() {
1027        let rule = MD044ProperNames::new(vec![], true);
1028
1029        let content = "This has javascript and typescript but no configured names.";
1030        let ctx = create_context(content);
1031        let result = rule.check(&ctx).unwrap();
1032
1033        assert!(result.is_empty(), "Should not flag anything with empty configuration");
1034    }
1035
1036    #[test]
1037    fn test_names_with_special_characters() {
1038        let rule = MD044ProperNames::new(
1039            vec!["Node.js".to_string(), "ASP.NET".to_string(), "C++".to_string()],
1040            true,
1041        );
1042
1043        let content = "We use nodejs, asp.net, ASP.NET, and c++ in our stack.";
1044        let ctx = create_context(content);
1045        let result = rule.check(&ctx).unwrap();
1046
1047        // nodejs should match Node.js (dotless variation)
1048        // asp.net should be flagged (wrong case)
1049        // ASP.NET should not be flagged (correct)
1050        // c++ should be flagged
1051        assert_eq!(result.len(), 3, "Should handle special characters correctly");
1052
1053        let messages: Vec<&str> = result.iter().map(|w| w.message.as_str()).collect();
1054        assert!(messages.contains(&"Proper name 'nodejs' should be 'Node.js'"));
1055        assert!(messages.contains(&"Proper name 'asp.net' should be 'ASP.NET'"));
1056        assert!(messages.contains(&"Proper name 'c++' should be 'C++'"));
1057    }
1058
1059    #[test]
1060    fn test_word_boundaries() {
1061        let rule = MD044ProperNames::new(vec!["Java".to_string(), "Script".to_string()], true);
1062
1063        let content = "JavaScript is not java or script, but Java and Script are separate.";
1064        let ctx = create_context(content);
1065        let result = rule.check(&ctx).unwrap();
1066
1067        // Should only flag lowercase "java" and "script" as separate words
1068        assert_eq!(result.len(), 2, "Should respect word boundaries");
1069        assert!(result.iter().any(|w| w.column == 19)); // "java" position
1070        assert!(result.iter().any(|w| w.column == 27)); // "script" position
1071    }
1072
1073    #[test]
1074    fn test_fix_method() {
1075        let rule = MD044ProperNames::new(
1076            vec![
1077                "JavaScript".to_string(),
1078                "TypeScript".to_string(),
1079                "Node.js".to_string(),
1080            ],
1081            true,
1082        );
1083
1084        let content = "I love javascript, typescript, and nodejs!";
1085        let ctx = create_context(content);
1086        let fixed = rule.fix(&ctx).unwrap();
1087
1088        assert_eq!(fixed, "I love JavaScript, TypeScript, and Node.js!");
1089    }
1090
1091    #[test]
1092    fn test_fix_multiple_occurrences() {
1093        let rule = MD044ProperNames::new(vec!["Python".to_string()], true);
1094
1095        let content = "python is great. I use python daily. PYTHON is powerful.";
1096        let ctx = create_context(content);
1097        let fixed = rule.fix(&ctx).unwrap();
1098
1099        assert_eq!(fixed, "Python is great. I use Python daily. Python is powerful.");
1100    }
1101
1102    #[test]
1103    fn test_fix_checks_code_blocks_by_default() {
1104        let rule = MD044ProperNames::new(vec!["JavaScript".to_string()], true);
1105
1106        let content = r#"I love javascript.
1107
1108```
1109const lang = "javascript";
1110```
1111
1112More javascript here."#;
1113
1114        let ctx = create_context(content);
1115        let fixed = rule.fix(&ctx).unwrap();
1116
1117        let expected = r#"I love JavaScript.
1118
1119```
1120const lang = "JavaScript";
1121```
1122
1123More JavaScript here."#;
1124
1125        assert_eq!(fixed, expected);
1126    }
1127
1128    #[test]
1129    fn test_multiline_content() {
1130        let rule = MD044ProperNames::new(vec!["Rust".to_string(), "Python".to_string()], true);
1131
1132        let content = r#"First line with rust.
1133Second line with python.
1134Third line with RUST and PYTHON."#;
1135
1136        let ctx = create_context(content);
1137        let result = rule.check(&ctx).unwrap();
1138
1139        assert_eq!(result.len(), 4, "Should flag all incorrect occurrences");
1140        assert_eq!(result[0].line, 1);
1141        assert_eq!(result[1].line, 2);
1142        assert_eq!(result[2].line, 3);
1143        assert_eq!(result[3].line, 3);
1144    }
1145
1146    #[test]
1147    fn test_default_config() {
1148        let config = MD044Config::default();
1149        assert!(config.names.is_empty());
1150        assert!(!config.code_blocks);
1151        assert!(config.html_elements);
1152        assert!(config.html_comments);
1153    }
1154
1155    #[test]
1156    fn test_default_config_checks_html_comments() {
1157        let config = MD044Config {
1158            names: vec!["JavaScript".to_string()],
1159            ..MD044Config::default()
1160        };
1161        let rule = MD044ProperNames::from_config_struct(config);
1162
1163        let content = "# Guide\n\n<!-- javascript mentioned here -->\n";
1164        let ctx = create_context(content);
1165        let result = rule.check(&ctx).unwrap();
1166
1167        assert_eq!(result.len(), 1, "Default config should check HTML comments");
1168        assert_eq!(result[0].line, 3);
1169    }
1170
1171    #[test]
1172    fn test_default_config_skips_code_blocks() {
1173        let config = MD044Config {
1174            names: vec!["JavaScript".to_string()],
1175            ..MD044Config::default()
1176        };
1177        let rule = MD044ProperNames::from_config_struct(config);
1178
1179        let content = "# Guide\n\n```\njavascript in code\n```\n";
1180        let ctx = create_context(content);
1181        let result = rule.check(&ctx).unwrap();
1182
1183        assert_eq!(result.len(), 0, "Default config should skip code blocks");
1184    }
1185
1186    #[test]
1187    fn test_standalone_html_comment_checked() {
1188        let config = MD044Config {
1189            names: vec!["Test".to_string()],
1190            ..MD044Config::default()
1191        };
1192        let rule = MD044ProperNames::from_config_struct(config);
1193
1194        let content = "# Heading\n\n<!-- this is a test example -->\n";
1195        let ctx = create_context(content);
1196        let result = rule.check(&ctx).unwrap();
1197
1198        assert_eq!(result.len(), 1, "Should flag proper name in standalone HTML comment");
1199        assert_eq!(result[0].line, 3);
1200    }
1201
1202    #[test]
1203    fn test_inline_config_comments_not_flagged() {
1204        let config = MD044Config {
1205            names: vec!["RUMDL".to_string()],
1206            ..MD044Config::default()
1207        };
1208        let rule = MD044ProperNames::from_config_struct(config);
1209
1210        // Lines 1, 3, 4, 6 are inline config comments — should not be flagged.
1211        // Lines 2, 5 contain "rumdl" in regular text — flagged by rule.check(),
1212        // but would be suppressed by the linting engine's inline config filtering.
1213        let content = "<!-- rumdl-disable MD044 -->\nSome rumdl text here.\n<!-- rumdl-enable MD044 -->\n<!-- markdownlint-disable -->\nMore rumdl text.\n<!-- markdownlint-enable -->\n";
1214        let ctx = create_context(content);
1215        let result = rule.check(&ctx).unwrap();
1216
1217        assert_eq!(result.len(), 2, "Should only flag body lines, not config comments");
1218        assert_eq!(result[0].line, 2);
1219        assert_eq!(result[1].line, 5);
1220    }
1221
1222    #[test]
1223    fn test_html_comment_skipped_when_disabled() {
1224        let config = MD044Config {
1225            names: vec!["Test".to_string()],
1226            code_blocks: true,
1227            html_elements: true,
1228            html_comments: false,
1229        };
1230        let rule = MD044ProperNames::from_config_struct(config);
1231
1232        let content = "# Heading\n\n<!-- this is a test example -->\n\nRegular test here.\n";
1233        let ctx = create_context(content);
1234        let result = rule.check(&ctx).unwrap();
1235
1236        assert_eq!(
1237            result.len(),
1238            1,
1239            "Should only flag 'test' outside HTML comment when html_comments=false"
1240        );
1241        assert_eq!(result[0].line, 5);
1242    }
1243
1244    #[test]
1245    fn test_fix_corrects_html_comment_content() {
1246        let config = MD044Config {
1247            names: vec!["JavaScript".to_string()],
1248            ..MD044Config::default()
1249        };
1250        let rule = MD044ProperNames::from_config_struct(config);
1251
1252        let content = "# Guide\n\n<!-- javascript mentioned here -->\n";
1253        let ctx = create_context(content);
1254        let fixed = rule.fix(&ctx).unwrap();
1255
1256        assert_eq!(fixed, "# Guide\n\n<!-- JavaScript mentioned here -->\n");
1257    }
1258
1259    #[test]
1260    fn test_fix_does_not_modify_inline_config_comments() {
1261        let config = MD044Config {
1262            names: vec!["RUMDL".to_string()],
1263            ..MD044Config::default()
1264        };
1265        let rule = MD044ProperNames::from_config_struct(config);
1266
1267        let content = "<!-- rumdl-disable -->\nSome rumdl text.\n<!-- rumdl-enable -->\n";
1268        let ctx = create_context(content);
1269        let fixed = rule.fix(&ctx).unwrap();
1270
1271        // Config comments should be untouched
1272        assert!(fixed.contains("<!-- rumdl-disable -->"));
1273        assert!(fixed.contains("<!-- rumdl-enable -->"));
1274        // Body text inside disable block should NOT be fixed (rule is disabled)
1275        assert!(
1276            fixed.contains("Some rumdl text."),
1277            "Line inside rumdl-disable block should not be modified by fix()"
1278        );
1279    }
1280
1281    #[test]
1282    fn test_fix_respects_inline_disable_partial() {
1283        let config = MD044Config {
1284            names: vec!["RUMDL".to_string()],
1285            ..MD044Config::default()
1286        };
1287        let rule = MD044ProperNames::from_config_struct(config);
1288
1289        let content =
1290            "<!-- rumdl-disable MD044 -->\nSome rumdl text.\n<!-- rumdl-enable MD044 -->\n\nSome rumdl text outside.\n";
1291        let ctx = create_context(content);
1292        let fixed = rule.fix(&ctx).unwrap();
1293
1294        // Line inside disable block should be preserved
1295        assert!(
1296            fixed.contains("Some rumdl text.\n<!-- rumdl-enable"),
1297            "Line inside disable block should not be modified"
1298        );
1299        // Line outside disable block should be fixed
1300        assert!(
1301            fixed.contains("Some RUMDL text outside."),
1302            "Line outside disable block should be fixed"
1303        );
1304    }
1305
1306    #[test]
1307    fn test_performance_with_many_names() {
1308        let mut names = vec![];
1309        for i in 0..50 {
1310            names.push(format!("ProperName{i}"));
1311        }
1312
1313        let rule = MD044ProperNames::new(names, true);
1314
1315        let content = "This has propername0, propername25, and propername49 incorrectly.";
1316        let ctx = create_context(content);
1317        let result = rule.check(&ctx).unwrap();
1318
1319        assert_eq!(result.len(), 3, "Should handle many configured names efficiently");
1320    }
1321
1322    #[test]
1323    fn test_large_name_count_performance() {
1324        // Verify MD044 can handle large numbers of names without regex limitations
1325        // This test confirms that fancy-regex handles large patterns well
1326        let names = (0..1000).map(|i| format!("ProperName{i}")).collect::<Vec<_>>();
1327
1328        let rule = MD044ProperNames::new(names, true);
1329
1330        // The combined pattern should be created successfully
1331        assert!(rule.combined_pattern.is_some());
1332
1333        // Should be able to check content without errors
1334        let content = "This has propername0 and propername999 in it.";
1335        let ctx = create_context(content);
1336        let result = rule.check(&ctx).unwrap();
1337
1338        // Should detect both incorrect names
1339        assert_eq!(result.len(), 2, "Should handle 1000 names without issues");
1340    }
1341
1342    #[test]
1343    fn test_cache_behavior() {
1344        let rule = MD044ProperNames::new(vec!["JavaScript".to_string()], true);
1345
1346        let content = "Using javascript here.";
1347        let ctx = create_context(content);
1348
1349        // First check
1350        let result1 = rule.check(&ctx).unwrap();
1351        assert_eq!(result1.len(), 1);
1352
1353        // Second check should use cache
1354        let result2 = rule.check(&ctx).unwrap();
1355        assert_eq!(result2.len(), 1);
1356
1357        // Results should be identical
1358        assert_eq!(result1[0].line, result2[0].line);
1359        assert_eq!(result1[0].column, result2[0].column);
1360    }
1361
1362    #[test]
1363    fn test_html_comments_not_checked_when_disabled() {
1364        let config = MD044Config {
1365            names: vec!["JavaScript".to_string()],
1366            code_blocks: true,    // Check code blocks
1367            html_elements: true,  // Check HTML elements
1368            html_comments: false, // Don't check HTML comments
1369        };
1370        let rule = MD044ProperNames::from_config_struct(config);
1371
1372        let content = r#"Regular javascript here.
1373<!-- This javascript in HTML comment should be ignored -->
1374More javascript outside."#;
1375
1376        let ctx = create_context(content);
1377        let result = rule.check(&ctx).unwrap();
1378
1379        assert_eq!(result.len(), 2, "Should only flag javascript outside HTML comments");
1380        assert_eq!(result[0].line, 1);
1381        assert_eq!(result[1].line, 3);
1382    }
1383
1384    #[test]
1385    fn test_html_comments_checked_when_enabled() {
1386        let config = MD044Config {
1387            names: vec!["JavaScript".to_string()],
1388            code_blocks: true,   // Check code blocks
1389            html_elements: true, // Check HTML elements
1390            html_comments: true, // Check HTML comments
1391        };
1392        let rule = MD044ProperNames::from_config_struct(config);
1393
1394        let content = r#"Regular javascript here.
1395<!-- This javascript in HTML comment should be checked -->
1396More javascript outside."#;
1397
1398        let ctx = create_context(content);
1399        let result = rule.check(&ctx).unwrap();
1400
1401        assert_eq!(
1402            result.len(),
1403            3,
1404            "Should flag all javascript occurrences including in HTML comments"
1405        );
1406    }
1407
1408    #[test]
1409    fn test_multiline_html_comments() {
1410        let config = MD044Config {
1411            names: vec!["Python".to_string(), "JavaScript".to_string()],
1412            code_blocks: true,    // Check code blocks
1413            html_elements: true,  // Check HTML elements
1414            html_comments: false, // Don't check HTML comments
1415        };
1416        let rule = MD044ProperNames::from_config_struct(config);
1417
1418        let content = r#"Regular python here.
1419<!--
1420This is a multiline comment
1421with javascript and python
1422that should be ignored
1423-->
1424More javascript outside."#;
1425
1426        let ctx = create_context(content);
1427        let result = rule.check(&ctx).unwrap();
1428
1429        assert_eq!(result.len(), 2, "Should only flag names outside HTML comments");
1430        assert_eq!(result[0].line, 1); // python
1431        assert_eq!(result[1].line, 7); // javascript
1432    }
1433
1434    #[test]
1435    fn test_fix_preserves_html_comments_when_disabled() {
1436        let config = MD044Config {
1437            names: vec!["JavaScript".to_string()],
1438            code_blocks: true,    // Check code blocks
1439            html_elements: true,  // Check HTML elements
1440            html_comments: false, // Don't check HTML comments
1441        };
1442        let rule = MD044ProperNames::from_config_struct(config);
1443
1444        let content = r#"javascript here.
1445<!-- javascript in comment -->
1446More javascript."#;
1447
1448        let ctx = create_context(content);
1449        let fixed = rule.fix(&ctx).unwrap();
1450
1451        let expected = r#"JavaScript here.
1452<!-- javascript in comment -->
1453More JavaScript."#;
1454
1455        assert_eq!(
1456            fixed, expected,
1457            "Should not fix names inside HTML comments when disabled"
1458        );
1459    }
1460
1461    #[test]
1462    fn test_proper_names_in_link_text_are_flagged() {
1463        let rule = MD044ProperNames::new(
1464            vec!["JavaScript".to_string(), "Node.js".to_string(), "Python".to_string()],
1465            true,
1466        );
1467
1468        let content = r#"Check this [javascript documentation](https://javascript.info) for info.
1469
1470Visit [node.js homepage](https://nodejs.org) and [python tutorial](https://python.org).
1471
1472Real javascript should be flagged.
1473
1474Also see the [typescript guide][ts-ref] for more.
1475
1476Real python should be flagged too.
1477
1478[ts-ref]: https://typescript.org/handbook"#;
1479
1480        let ctx = create_context(content);
1481        let result = rule.check(&ctx).unwrap();
1482
1483        // Link text should be checked, URLs should not be checked
1484        // Line 1: [javascript documentation] - "javascript" should be flagged
1485        // Line 3: [node.js homepage] - "node.js" should be flagged (matches "Node.js")
1486        // Line 3: [python tutorial] - "python" should be flagged
1487        // Line 5: standalone javascript
1488        // Line 9: standalone python
1489        assert_eq!(result.len(), 5, "Expected 5 warnings: 3 in link text + 2 standalone");
1490
1491        // Verify line numbers for link text warnings
1492        let line_1_warnings: Vec<_> = result.iter().filter(|w| w.line == 1).collect();
1493        assert_eq!(line_1_warnings.len(), 1);
1494        assert!(
1495            line_1_warnings[0]
1496                .message
1497                .contains("'javascript' should be 'JavaScript'")
1498        );
1499
1500        let line_3_warnings: Vec<_> = result.iter().filter(|w| w.line == 3).collect();
1501        assert_eq!(line_3_warnings.len(), 2); // node.js and python
1502
1503        // Standalone warnings
1504        assert!(result.iter().any(|w| w.line == 5 && w.message.contains("'javascript'")));
1505        assert!(result.iter().any(|w| w.line == 9 && w.message.contains("'python'")));
1506    }
1507
1508    #[test]
1509    fn test_link_urls_not_flagged() {
1510        let rule = MD044ProperNames::new(vec!["JavaScript".to_string()], true);
1511
1512        // URL contains "javascript" but should NOT be flagged
1513        let content = r#"[Link Text](https://javascript.info/guide)"#;
1514
1515        let ctx = create_context(content);
1516        let result = rule.check(&ctx).unwrap();
1517
1518        // URL should not be checked
1519        assert!(result.is_empty(), "URLs should not be checked for proper names");
1520    }
1521
1522    #[test]
1523    fn test_proper_names_in_image_alt_text_are_flagged() {
1524        let rule = MD044ProperNames::new(vec!["JavaScript".to_string()], true);
1525
1526        let content = r#"Here is a ![javascript logo](javascript.png "javascript icon") image.
1527
1528Real javascript should be flagged."#;
1529
1530        let ctx = create_context(content);
1531        let result = rule.check(&ctx).unwrap();
1532
1533        // Image alt text should be checked, URL and title should not be checked
1534        // Line 1: ![javascript logo] - "javascript" should be flagged
1535        // Line 3: standalone javascript
1536        assert_eq!(result.len(), 2, "Expected 2 warnings: 1 in alt text + 1 standalone");
1537        assert!(result[0].message.contains("'javascript' should be 'JavaScript'"));
1538        assert!(result[0].line == 1); // "![javascript logo]"
1539        assert!(result[1].message.contains("'javascript' should be 'JavaScript'"));
1540        assert!(result[1].line == 3); // "Real javascript should be flagged."
1541    }
1542
1543    #[test]
1544    fn test_image_urls_not_flagged() {
1545        let rule = MD044ProperNames::new(vec!["JavaScript".to_string()], true);
1546
1547        // URL contains "javascript" but should NOT be flagged
1548        let content = r#"![Logo](https://javascript.info/logo.png)"#;
1549
1550        let ctx = create_context(content);
1551        let result = rule.check(&ctx).unwrap();
1552
1553        // Image URL should not be checked
1554        assert!(result.is_empty(), "Image URLs should not be checked for proper names");
1555    }
1556
1557    #[test]
1558    fn test_reference_link_text_flagged_but_definition_not() {
1559        let rule = MD044ProperNames::new(vec!["JavaScript".to_string(), "TypeScript".to_string()], true);
1560
1561        let content = r#"Check the [javascript guide][js-ref] for details.
1562
1563Real javascript should be flagged.
1564
1565[js-ref]: https://javascript.info/typescript/guide"#;
1566
1567        let ctx = create_context(content);
1568        let result = rule.check(&ctx).unwrap();
1569
1570        // Link text should be checked, reference definitions should not
1571        // Line 1: [javascript guide] - should be flagged
1572        // Line 3: standalone javascript - should be flagged
1573        // Line 5: reference definition - should NOT be flagged
1574        assert_eq!(result.len(), 2, "Expected 2 warnings: 1 in link text + 1 standalone");
1575        assert!(result.iter().any(|w| w.line == 1 && w.message.contains("'javascript'")));
1576        assert!(result.iter().any(|w| w.line == 3 && w.message.contains("'javascript'")));
1577    }
1578
1579    #[test]
1580    fn test_reference_definitions_not_flagged() {
1581        let rule = MD044ProperNames::new(vec!["JavaScript".to_string()], true);
1582
1583        // Reference definition should NOT be flagged
1584        let content = r#"[js-ref]: https://javascript.info/guide"#;
1585
1586        let ctx = create_context(content);
1587        let result = rule.check(&ctx).unwrap();
1588
1589        // Reference definition URLs should not be checked
1590        assert!(result.is_empty(), "Reference definitions should not be checked");
1591    }
1592
1593    #[test]
1594    fn test_wikilinks_text_is_flagged() {
1595        let rule = MD044ProperNames::new(vec!["JavaScript".to_string()], true);
1596
1597        // WikiLinks [[destination]] should have their text checked
1598        let content = r#"[[javascript]]
1599
1600Regular javascript here.
1601
1602[[JavaScript|display text]]"#;
1603
1604        let ctx = create_context(content);
1605        let result = rule.check(&ctx).unwrap();
1606
1607        // Line 1: [[javascript]] - should be flagged (WikiLink text)
1608        // Line 3: standalone javascript - should be flagged
1609        // Line 5: [[JavaScript|display text]] - correct capitalization, no flag
1610        assert_eq!(result.len(), 2, "Expected 2 warnings: 1 in WikiLink + 1 standalone");
1611        assert!(
1612            result
1613                .iter()
1614                .any(|w| w.line == 1 && w.column == 3 && w.message.contains("'javascript'"))
1615        );
1616        assert!(result.iter().any(|w| w.line == 3 && w.message.contains("'javascript'")));
1617    }
1618
1619    #[test]
1620    fn test_url_link_text_not_flagged() {
1621        let rule = MD044ProperNames::new(vec!["GitHub".to_string()], true);
1622
1623        // Link text that is itself a URL should not be flagged
1624        let content = r#"[https://github.com/org/repo](https://github.com/org/repo)
1625
1626[http://github.com/org/repo](http://github.com/org/repo)
1627
1628[www.github.com/org/repo](https://www.github.com/org/repo)"#;
1629
1630        let ctx = create_context(content);
1631        let result = rule.check(&ctx).unwrap();
1632
1633        assert!(
1634            result.is_empty(),
1635            "URL-like link text should not be flagged, got: {result:?}"
1636        );
1637    }
1638
1639    #[test]
1640    fn test_url_link_text_with_leading_space_not_flagged() {
1641        let rule = MD044ProperNames::new(vec!["GitHub".to_string()], true);
1642
1643        // Leading/trailing whitespace in link text should be trimmed before URL check
1644        let content = r#"[ https://github.com/org/repo](https://github.com/org/repo)"#;
1645
1646        let ctx = create_context(content);
1647        let result = rule.check(&ctx).unwrap();
1648
1649        assert!(
1650            result.is_empty(),
1651            "URL-like link text with leading space should not be flagged, got: {result:?}"
1652        );
1653    }
1654
1655    #[test]
1656    fn test_url_link_text_uppercase_scheme_not_flagged() {
1657        let rule = MD044ProperNames::new(vec!["GitHub".to_string()], true);
1658
1659        let content = r#"[HTTPS://GITHUB.COM/org/repo](https://github.com/org/repo)"#;
1660
1661        let ctx = create_context(content);
1662        let result = rule.check(&ctx).unwrap();
1663
1664        assert!(
1665            result.is_empty(),
1666            "URL-like link text with uppercase scheme should not be flagged, got: {result:?}"
1667        );
1668    }
1669
1670    #[test]
1671    fn test_non_url_link_text_still_flagged() {
1672        let rule = MD044ProperNames::new(vec!["GitHub".to_string()], true);
1673
1674        // Link text that is NOT a URL should still be flagged
1675        let content = r#"[github.com/org/repo](https://github.com/org/repo)
1676
1677[Visit github](https://github.com/org/repo)
1678
1679[//github.com/org/repo](//github.com/org/repo)
1680
1681[ftp://github.com/org/repo](ftp://github.com/org/repo)"#;
1682
1683        let ctx = create_context(content);
1684        let result = rule.check(&ctx).unwrap();
1685
1686        assert_eq!(result.len(), 4, "Non-URL link text should be flagged, got: {result:?}");
1687        assert!(result.iter().any(|w| w.line == 1)); // github.com (no protocol)
1688        assert!(result.iter().any(|w| w.line == 3)); // Visit github
1689        assert!(result.iter().any(|w| w.line == 5)); // //github.com (protocol-relative)
1690        assert!(result.iter().any(|w| w.line == 7)); // ftp://github.com
1691    }
1692
1693    #[test]
1694    fn test_url_link_text_fix_not_applied() {
1695        let rule = MD044ProperNames::new(vec!["GitHub".to_string()], true);
1696
1697        let content = "[https://github.com/org/repo](https://github.com/org/repo)\n";
1698
1699        let ctx = create_context(content);
1700        let result = rule.fix(&ctx).unwrap();
1701
1702        assert_eq!(result, content, "Fix should not modify URL-like link text");
1703    }
1704
1705    #[test]
1706    fn test_mixed_url_and_regular_link_text() {
1707        let rule = MD044ProperNames::new(vec!["GitHub".to_string()], true);
1708
1709        // Mix of URL link text (should skip) and regular text (should flag)
1710        let content = r#"[https://github.com/org/repo](https://github.com/org/repo)
1711
1712Visit [github documentation](https://github.com/docs) for details.
1713
1714[www.github.com/pricing](https://www.github.com/pricing)"#;
1715
1716        let ctx = create_context(content);
1717        let result = rule.check(&ctx).unwrap();
1718
1719        // Only line 3 should be flagged ("github documentation" is not a URL)
1720        assert_eq!(
1721            result.len(),
1722            1,
1723            "Only non-URL link text should be flagged, got: {result:?}"
1724        );
1725        assert_eq!(result[0].line, 3);
1726    }
1727
1728    #[test]
1729    fn test_html_attribute_values_not_flagged() {
1730        // Matches inside HTML tag attributes (between `<` and `>`) are not flagged.
1731        // Attribute values are not prose — they hold URLs, class names, data values, etc.
1732        let rule = MD044ProperNames::new(vec!["Test".to_string()], true);
1733        let content = "# Heading\n\ntest\n\n<img src=\"www.example.test/test_image.png\">\n";
1734        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1735        let result = rule.check(&ctx).unwrap();
1736
1737        // Nothing on line 5 should be flagged — everything is inside the `<img ...>` tag
1738        let line5_violations: Vec<_> = result.iter().filter(|w| w.line == 5).collect();
1739        assert!(
1740            line5_violations.is_empty(),
1741            "Should not flag anything inside HTML tag attributes: {line5_violations:?}"
1742        );
1743
1744        // Plain text on line 3 is still flagged
1745        let line3_violations: Vec<_> = result.iter().filter(|w| w.line == 3).collect();
1746        assert_eq!(line3_violations.len(), 1, "Plain 'test' on line 3 should be flagged");
1747    }
1748
1749    #[test]
1750    fn test_html_text_content_still_flagged() {
1751        // Text between HTML tags (not inside `<...>`) is still checked.
1752        let rule = MD044ProperNames::new(vec!["Test".to_string()], true);
1753        let content = "# Heading\n\n<a href=\"https://example.test/page\">test link</a>\n";
1754        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1755        let result = rule.check(&ctx).unwrap();
1756
1757        // "example.test" in the href attribute → not flagged (inside `<...>`)
1758        // "test link" in the anchor text → flagged (between `>` and `<`)
1759        assert_eq!(
1760            result.len(),
1761            1,
1762            "Should flag only 'test' in anchor text, not in href: {result:?}"
1763        );
1764        assert_eq!(result[0].column, 37, "Should flag col 37 ('test link' in anchor text)");
1765    }
1766
1767    #[test]
1768    fn test_html_attribute_various_not_flagged() {
1769        // All attribute types are ignored: src, href, alt, class, data-*, title, etc.
1770        let rule = MD044ProperNames::new(vec!["Test".to_string()], true);
1771        let content = concat!(
1772            "# Heading\n\n",
1773            "<img src=\"test.png\" alt=\"test image\">\n",
1774            "<span class=\"test-class\" data-test=\"value\">test content</span>\n",
1775        );
1776        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1777        let result = rule.check(&ctx).unwrap();
1778
1779        // Only "test content" (between tags on line 4) should be flagged
1780        assert_eq!(
1781            result.len(),
1782            1,
1783            "Should flag only 'test content' between tags: {result:?}"
1784        );
1785        assert_eq!(result[0].line, 4);
1786    }
1787
1788    #[test]
1789    fn test_plain_text_underscore_boundary_unchanged() {
1790        // Plain text (outside HTML tags) still uses original word boundary semantics where
1791        // underscore is a boundary character, matching markdownlint's behavior via AST splitting.
1792        let rule = MD044ProperNames::new(vec!["Test".to_string()], true);
1793        let content = "# Heading\n\ntest_image is here and just_test ends here\n";
1794        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1795        let result = rule.check(&ctx).unwrap();
1796
1797        // Both "test_image" (test at start) and "just_test" (test at end) are flagged
1798        // because in plain text, "_" is a word boundary
1799        assert_eq!(
1800            result.len(),
1801            2,
1802            "Should flag 'test' in both 'test_image' and 'just_test': {result:?}"
1803        );
1804        let cols: Vec<usize> = result.iter().map(|w| w.column).collect();
1805        assert!(cols.contains(&1), "Should flag col 1 (test_image): {cols:?}");
1806        assert!(cols.contains(&29), "Should flag col 29 (just_test): {cols:?}");
1807    }
1808
1809    #[test]
1810    fn test_frontmatter_yaml_keys_not_flagged() {
1811        // YAML keys in frontmatter should NOT be checked for proper name violations.
1812        // Only values should be checked.
1813        let rule = MD044ProperNames::new(vec!["Test".to_string()], true);
1814
1815        let content = "---\ntitle: Heading\ntest: Some Test value\n---\n\nTest\n";
1816        let ctx = create_context(content);
1817        let result = rule.check(&ctx).unwrap();
1818
1819        // "test" in the YAML key (line 3) should NOT be flagged
1820        // "Test" in the YAML value (line 3) is correct capitalization, no flag
1821        // "Test" in body (line 6) is correct capitalization, no flag
1822        assert!(
1823            result.is_empty(),
1824            "Should not flag YAML keys or correctly capitalized values: {result:?}"
1825        );
1826    }
1827
1828    #[test]
1829    fn test_frontmatter_yaml_values_flagged() {
1830        // Incorrectly capitalized names in YAML values should be flagged.
1831        let rule = MD044ProperNames::new(vec!["Test".to_string()], true);
1832
1833        let content = "---\ntitle: Heading\nkey: a test value\n---\n\nTest\n";
1834        let ctx = create_context(content);
1835        let result = rule.check(&ctx).unwrap();
1836
1837        // "test" in the YAML value (line 3) SHOULD be flagged
1838        assert_eq!(result.len(), 1, "Should flag 'test' in YAML value: {result:?}");
1839        assert_eq!(result[0].line, 3);
1840        assert_eq!(result[0].column, 8); // "key: a " = 7 chars, then "test" at column 8
1841    }
1842
1843    #[test]
1844    fn test_frontmatter_key_matches_name_not_flagged() {
1845        // A YAML key that happens to match a configured name should NOT be flagged.
1846        let rule = MD044ProperNames::new(vec!["Test".to_string()], true);
1847
1848        let content = "---\ntest: other value\n---\n\nBody text\n";
1849        let ctx = create_context(content);
1850        let result = rule.check(&ctx).unwrap();
1851
1852        assert!(
1853            result.is_empty(),
1854            "Should not flag YAML key that matches configured name: {result:?}"
1855        );
1856    }
1857
1858    #[test]
1859    fn test_frontmatter_empty_value_not_flagged() {
1860        // YAML key with no value should be skipped entirely.
1861        let rule = MD044ProperNames::new(vec!["Test".to_string()], true);
1862
1863        let content = "---\ntest:\ntest: \n---\n\nBody text\n";
1864        let ctx = create_context(content);
1865        let result = rule.check(&ctx).unwrap();
1866
1867        assert!(
1868            result.is_empty(),
1869            "Should not flag YAML keys with empty values: {result:?}"
1870        );
1871    }
1872
1873    #[test]
1874    fn test_frontmatter_nested_yaml_key_not_flagged() {
1875        // Nested/indented YAML keys should also be skipped.
1876        let rule = MD044ProperNames::new(vec!["Test".to_string()], true);
1877
1878        let content = "---\nparent:\n  test: nested value\n---\n\nBody text\n";
1879        let ctx = create_context(content);
1880        let result = rule.check(&ctx).unwrap();
1881
1882        // "test" as a nested key should NOT be flagged
1883        assert!(result.is_empty(), "Should not flag nested YAML keys: {result:?}");
1884    }
1885
1886    #[test]
1887    fn test_frontmatter_list_items_checked() {
1888        // YAML list items are values and should be checked for proper names.
1889        let rule = MD044ProperNames::new(vec!["Test".to_string()], true);
1890
1891        let content = "---\ntags:\n  - test\n  - other\n---\n\nBody text\n";
1892        let ctx = create_context(content);
1893        let result = rule.check(&ctx).unwrap();
1894
1895        // "test" as a list item value SHOULD be flagged
1896        assert_eq!(result.len(), 1, "Should flag 'test' in YAML list item: {result:?}");
1897        assert_eq!(result[0].line, 3);
1898    }
1899
1900    #[test]
1901    fn test_frontmatter_value_with_multiple_colons() {
1902        // For "key: value: more", key is before first colon.
1903        let rule = MD044ProperNames::new(vec!["Test".to_string()], true);
1904
1905        let content = "---\ntest: description: a test thing\n---\n\nBody text\n";
1906        let ctx = create_context(content);
1907        let result = rule.check(&ctx).unwrap();
1908
1909        // "test" as key should NOT be flagged
1910        // "test" in value portion ("description: a test thing") SHOULD be flagged
1911        assert_eq!(
1912            result.len(),
1913            1,
1914            "Should flag 'test' in value after first colon: {result:?}"
1915        );
1916        assert_eq!(result[0].line, 2);
1917        assert!(result[0].column > 6, "Violation column should be in value portion");
1918    }
1919
1920    #[test]
1921    fn test_frontmatter_does_not_affect_body() {
1922        // Body text after frontmatter should still be fully checked.
1923        let rule = MD044ProperNames::new(vec!["Test".to_string()], true);
1924
1925        let content = "---\ntitle: Heading\n---\n\ntest should be flagged here\n";
1926        let ctx = create_context(content);
1927        let result = rule.check(&ctx).unwrap();
1928
1929        assert_eq!(result.len(), 1, "Should flag 'test' in body text: {result:?}");
1930        assert_eq!(result[0].line, 5);
1931    }
1932
1933    #[test]
1934    fn test_frontmatter_fix_corrects_values_preserves_keys() {
1935        // Fix should correct YAML values but preserve keys.
1936        let rule = MD044ProperNames::new(vec!["Test".to_string()], true);
1937
1938        let content = "---\ntest: a test value\n---\n\ntest here\n";
1939        let ctx = create_context(content);
1940        let fixed = rule.fix(&ctx).unwrap();
1941
1942        // Key "test" should remain lowercase; value "test" should become "Test"
1943        assert_eq!(fixed, "---\ntest: a Test value\n---\n\nTest here\n");
1944    }
1945
1946    #[test]
1947    fn test_frontmatter_multiword_value_flagged() {
1948        // Multiple proper names in a single YAML value should all be flagged.
1949        let rule = MD044ProperNames::new(vec!["JavaScript".to_string(), "TypeScript".to_string()], true);
1950
1951        let content = "---\ndescription: Learn javascript and typescript\n---\n\nBody\n";
1952        let ctx = create_context(content);
1953        let result = rule.check(&ctx).unwrap();
1954
1955        assert_eq!(result.len(), 2, "Should flag both names in YAML value: {result:?}");
1956        assert!(result.iter().all(|w| w.line == 2));
1957    }
1958
1959    #[test]
1960    fn test_frontmatter_yaml_comments_not_checked() {
1961        // YAML comments inside frontmatter should be skipped entirely.
1962        let rule = MD044ProperNames::new(vec!["Test".to_string()], true);
1963
1964        let content = "---\n# test comment\ntitle: Heading\n---\n\nBody text\n";
1965        let ctx = create_context(content);
1966        let result = rule.check(&ctx).unwrap();
1967
1968        assert!(result.is_empty(), "Should not flag names in YAML comments: {result:?}");
1969    }
1970
1971    #[test]
1972    fn test_frontmatter_delimiters_not_checked() {
1973        // Frontmatter delimiter lines (--- or +++) should never be checked.
1974        let rule = MD044ProperNames::new(vec!["Test".to_string()], true);
1975
1976        let content = "---\ntitle: Heading\n---\n\ntest here\n";
1977        let ctx = create_context(content);
1978        let result = rule.check(&ctx).unwrap();
1979
1980        // Only the body "test" on line 5 should be flagged
1981        assert_eq!(result.len(), 1, "Should only flag body text: {result:?}");
1982        assert_eq!(result[0].line, 5);
1983    }
1984
1985    #[test]
1986    fn test_frontmatter_continuation_lines_checked() {
1987        // Continuation lines (indented, no colon) are value content and should be checked.
1988        let rule = MD044ProperNames::new(vec!["Test".to_string()], true);
1989
1990        let content = "---\ndescription: >\n  a test value\n  continued here\n---\n\nBody\n";
1991        let ctx = create_context(content);
1992        let result = rule.check(&ctx).unwrap();
1993
1994        // "test" on the continuation line should be flagged
1995        assert_eq!(result.len(), 1, "Should flag 'test' in continuation line: {result:?}");
1996        assert_eq!(result[0].line, 3);
1997    }
1998
1999    #[test]
2000    fn test_frontmatter_quoted_values_checked() {
2001        // Quoted YAML values should have their content checked (inside the quotes).
2002        let rule = MD044ProperNames::new(vec!["Test".to_string()], true);
2003
2004        let content = "---\ntitle: \"a test title\"\n---\n\nBody\n";
2005        let ctx = create_context(content);
2006        let result = rule.check(&ctx).unwrap();
2007
2008        assert_eq!(result.len(), 1, "Should flag 'test' in quoted YAML value: {result:?}");
2009        assert_eq!(result[0].line, 2);
2010    }
2011
2012    #[test]
2013    fn test_frontmatter_single_quoted_values_checked() {
2014        // Single-quoted YAML values should have their content checked.
2015        let rule = MD044ProperNames::new(vec!["Test".to_string()], true);
2016
2017        let content = "---\ntitle: 'a test title'\n---\n\nBody\n";
2018        let ctx = create_context(content);
2019        let result = rule.check(&ctx).unwrap();
2020
2021        assert_eq!(
2022            result.len(),
2023            1,
2024            "Should flag 'test' in single-quoted YAML value: {result:?}"
2025        );
2026        assert_eq!(result[0].line, 2);
2027    }
2028
2029    #[test]
2030    fn test_frontmatter_fix_multiword_values() {
2031        // Fix should correct all proper names in frontmatter values.
2032        let rule = MD044ProperNames::new(vec!["JavaScript".to_string(), "TypeScript".to_string()], true);
2033
2034        let content = "---\ndescription: Learn javascript and typescript\n---\n\nBody\n";
2035        let ctx = create_context(content);
2036        let fixed = rule.fix(&ctx).unwrap();
2037
2038        assert_eq!(
2039            fixed,
2040            "---\ndescription: Learn JavaScript and TypeScript\n---\n\nBody\n"
2041        );
2042    }
2043
2044    #[test]
2045    fn test_frontmatter_fix_preserves_yaml_structure() {
2046        // Fix should preserve YAML structure while correcting values.
2047        let rule = MD044ProperNames::new(vec!["Test".to_string()], true);
2048
2049        let content = "---\ntags:\n  - test\n  - other\ntitle: a test doc\n---\n\ntest body\n";
2050        let ctx = create_context(content);
2051        let fixed = rule.fix(&ctx).unwrap();
2052
2053        assert_eq!(
2054            fixed,
2055            "---\ntags:\n  - Test\n  - other\ntitle: a Test doc\n---\n\nTest body\n"
2056        );
2057    }
2058
2059    #[test]
2060    fn test_frontmatter_toml_delimiters_not_checked() {
2061        // TOML frontmatter with +++ delimiters should also be handled.
2062        let rule = MD044ProperNames::new(vec!["Test".to_string()], true);
2063
2064        let content = "+++\ntitle = \"a test title\"\n+++\n\ntest body\n";
2065        let ctx = create_context(content);
2066        let result = rule.check(&ctx).unwrap();
2067
2068        // "title" as TOML key should NOT be flagged
2069        // "test" in TOML quoted value SHOULD be flagged (line 2)
2070        // "test" in body SHOULD be flagged (line 5)
2071        assert_eq!(result.len(), 2, "Should flag TOML value and body: {result:?}");
2072        let fm_violations: Vec<_> = result.iter().filter(|w| w.line == 2).collect();
2073        assert_eq!(fm_violations.len(), 1, "Should flag 'test' in TOML value: {result:?}");
2074        let body_violations: Vec<_> = result.iter().filter(|w| w.line == 5).collect();
2075        assert_eq!(body_violations.len(), 1, "Should flag body 'test': {result:?}");
2076    }
2077
2078    #[test]
2079    fn test_frontmatter_toml_key_not_flagged() {
2080        // TOML keys should NOT be flagged, only values.
2081        let rule = MD044ProperNames::new(vec!["Test".to_string()], true);
2082
2083        let content = "+++\ntest = \"other value\"\n+++\n\nBody text\n";
2084        let ctx = create_context(content);
2085        let result = rule.check(&ctx).unwrap();
2086
2087        assert!(
2088            result.is_empty(),
2089            "Should not flag TOML key that matches configured name: {result:?}"
2090        );
2091    }
2092
2093    #[test]
2094    fn test_frontmatter_toml_fix_preserves_keys() {
2095        // Fix should correct TOML values but preserve keys.
2096        let rule = MD044ProperNames::new(vec!["Test".to_string()], true);
2097
2098        let content = "+++\ntest = \"a test value\"\n+++\n\ntest here\n";
2099        let ctx = create_context(content);
2100        let fixed = rule.fix(&ctx).unwrap();
2101
2102        // Key "test" should remain lowercase; value "test" should become "Test"
2103        assert_eq!(fixed, "+++\ntest = \"a Test value\"\n+++\n\nTest here\n");
2104    }
2105
2106    #[test]
2107    fn test_frontmatter_list_item_mapping_key_not_flagged() {
2108        // In "- test: nested value", "test" is a YAML key within a list-item mapping.
2109        // The key should NOT be flagged; only the value should be checked.
2110        let rule = MD044ProperNames::new(vec!["Test".to_string()], true);
2111
2112        let content = "---\nitems:\n  - test: nested value\n---\n\nBody text\n";
2113        let ctx = create_context(content);
2114        let result = rule.check(&ctx).unwrap();
2115
2116        assert!(
2117            result.is_empty(),
2118            "Should not flag YAML key in list-item mapping: {result:?}"
2119        );
2120    }
2121
2122    #[test]
2123    fn test_frontmatter_list_item_mapping_value_flagged() {
2124        // In "- key: test value", the value portion should be checked.
2125        let rule = MD044ProperNames::new(vec!["Test".to_string()], true);
2126
2127        let content = "---\nitems:\n  - key: a test value\n---\n\nBody text\n";
2128        let ctx = create_context(content);
2129        let result = rule.check(&ctx).unwrap();
2130
2131        assert_eq!(
2132            result.len(),
2133            1,
2134            "Should flag 'test' in list-item mapping value: {result:?}"
2135        );
2136        assert_eq!(result[0].line, 3);
2137    }
2138
2139    #[test]
2140    fn test_frontmatter_bare_list_item_still_flagged() {
2141        // Bare list items without a colon (e.g., "- test") are values and should be flagged.
2142        let rule = MD044ProperNames::new(vec!["Test".to_string()], true);
2143
2144        let content = "---\ntags:\n  - test\n  - other\n---\n\nBody text\n";
2145        let ctx = create_context(content);
2146        let result = rule.check(&ctx).unwrap();
2147
2148        assert_eq!(result.len(), 1, "Should flag 'test' in bare list item: {result:?}");
2149        assert_eq!(result[0].line, 3);
2150    }
2151
2152    #[test]
2153    fn test_frontmatter_flow_mapping_not_flagged() {
2154        // Flow mappings like {test: value} contain YAML keys that should not be flagged.
2155        // The entire flow construct should be skipped.
2156        let rule = MD044ProperNames::new(vec!["Test".to_string()], true);
2157
2158        let content = "---\nflow_map: {test: value, other: test}\n---\n\nBody text\n";
2159        let ctx = create_context(content);
2160        let result = rule.check(&ctx).unwrap();
2161
2162        assert!(
2163            result.is_empty(),
2164            "Should not flag names inside flow mappings: {result:?}"
2165        );
2166    }
2167
2168    #[test]
2169    fn test_frontmatter_flow_sequence_not_flagged() {
2170        // Flow sequences like [test, other] should also be skipped.
2171        let rule = MD044ProperNames::new(vec!["Test".to_string()], true);
2172
2173        let content = "---\nitems: [test, other, test]\n---\n\nBody text\n";
2174        let ctx = create_context(content);
2175        let result = rule.check(&ctx).unwrap();
2176
2177        assert!(
2178            result.is_empty(),
2179            "Should not flag names inside flow sequences: {result:?}"
2180        );
2181    }
2182
2183    #[test]
2184    fn test_frontmatter_list_item_mapping_fix_preserves_key() {
2185        // Fix should correct values in list-item mappings but preserve keys.
2186        let rule = MD044ProperNames::new(vec!["Test".to_string()], true);
2187
2188        let content = "---\nitems:\n  - test: a test value\n---\n\ntest here\n";
2189        let ctx = create_context(content);
2190        let fixed = rule.fix(&ctx).unwrap();
2191
2192        // "test" as list-item key should remain lowercase;
2193        // "test" in value portion should become "Test"
2194        assert_eq!(fixed, "---\nitems:\n  - test: a Test value\n---\n\nTest here\n");
2195    }
2196
2197    // --- Angle-bracket URL tests (issue #457) ---
2198
2199    #[test]
2200    fn test_angle_bracket_url_in_html_comment_not_flagged() {
2201        // Angle-bracket URLs inside HTML comments should be skipped
2202        let config = MD044Config {
2203            names: vec!["Test".to_string()],
2204            ..MD044Config::default()
2205        };
2206        let rule = MD044ProperNames::from_config_struct(config);
2207
2208        let content = "---\ntitle: Level 1 heading\n---\n\n<https://www.example.test>\n\n<!-- This is a Test https://www.example.test -->\n<!-- This is a Test <https://www.example.test> -->\n";
2209        let ctx = create_context(content);
2210        let result = rule.check(&ctx).unwrap();
2211
2212        // Line 7: "Test" in comment prose before bare URL -- already correct capitalization
2213        // Line 7: "test" in bare URL (not in angle brackets) -- but "test" is in URL domain, not prose.
2214        //   However, .example.test has "test" at a word boundary (after '.'), so it IS flagged.
2215        // Line 8: "Test" in comment prose -- correct capitalization, not flagged
2216        // Line 8: "test" in <https://www.example.test> -- inside angle-bracket URL, NOT flagged
2217
2218        // The key assertion: line 8's angle-bracket URL should NOT produce a warning
2219        let line8_warnings: Vec<_> = result.iter().filter(|w| w.line == 8).collect();
2220        assert!(
2221            line8_warnings.is_empty(),
2222            "Should not flag names inside angle-bracket URLs in HTML comments: {line8_warnings:?}"
2223        );
2224    }
2225
2226    #[test]
2227    fn test_bare_url_in_html_comment_still_flagged() {
2228        // Bare URLs (not in angle brackets) inside HTML comments should still be checked
2229        let config = MD044Config {
2230            names: vec!["Test".to_string()],
2231            ..MD044Config::default()
2232        };
2233        let rule = MD044ProperNames::from_config_struct(config);
2234
2235        let content = "<!-- This is a test https://www.example.test -->\n";
2236        let ctx = create_context(content);
2237        let result = rule.check(&ctx).unwrap();
2238
2239        // "test" appears as prose text before URL and also in the bare URL domain
2240        // At minimum, the prose "test" should be flagged
2241        assert!(
2242            !result.is_empty(),
2243            "Should flag 'test' in prose text of HTML comment with bare URL"
2244        );
2245    }
2246
2247    #[test]
2248    fn test_angle_bracket_url_in_regular_markdown_not_flagged() {
2249        // Angle-bracket URLs in regular markdown are already handled by the link parser,
2250        // but the angle-bracket check provides a safety net
2251        let rule = MD044ProperNames::new(vec!["Test".to_string()], true);
2252
2253        let content = "<https://www.example.test>\n";
2254        let ctx = create_context(content);
2255        let result = rule.check(&ctx).unwrap();
2256
2257        assert!(
2258            result.is_empty(),
2259            "Should not flag names inside angle-bracket URLs in regular markdown: {result:?}"
2260        );
2261    }
2262
2263    #[test]
2264    fn test_multiple_angle_bracket_urls_in_one_comment() {
2265        let config = MD044Config {
2266            names: vec!["Test".to_string()],
2267            ..MD044Config::default()
2268        };
2269        let rule = MD044ProperNames::from_config_struct(config);
2270
2271        let content = "<!-- See <https://test.example.com> and <https://www.example.test> for details -->\n";
2272        let ctx = create_context(content);
2273        let result = rule.check(&ctx).unwrap();
2274
2275        // Both URLs are inside angle brackets, so "test" inside them should NOT be flagged
2276        assert!(
2277            result.is_empty(),
2278            "Should not flag names inside multiple angle-bracket URLs: {result:?}"
2279        );
2280    }
2281
2282    #[test]
2283    fn test_angle_bracket_non_url_still_flagged() {
2284        // <Test> is NOT a URL (no scheme), so is_in_angle_bracket_url does NOT protect it.
2285        // Whether it gets flagged depends on HTML tag detection, not on our URL check.
2286        assert!(
2287            !MD044ProperNames::is_in_angle_bracket_url("<test> which is not a URL.", 1),
2288            "is_in_angle_bracket_url should return false for non-URL angle brackets"
2289        );
2290    }
2291
2292    #[test]
2293    fn test_angle_bracket_mailto_url_not_flagged() {
2294        let config = MD044Config {
2295            names: vec!["Test".to_string()],
2296            ..MD044Config::default()
2297        };
2298        let rule = MD044ProperNames::from_config_struct(config);
2299
2300        let content = "<!-- Contact <mailto:test@example.com> for help -->\n";
2301        let ctx = create_context(content);
2302        let result = rule.check(&ctx).unwrap();
2303
2304        assert!(
2305            result.is_empty(),
2306            "Should not flag names inside angle-bracket mailto URLs: {result:?}"
2307        );
2308    }
2309
2310    #[test]
2311    fn test_angle_bracket_ftp_url_not_flagged() {
2312        let config = MD044Config {
2313            names: vec!["Test".to_string()],
2314            ..MD044Config::default()
2315        };
2316        let rule = MD044ProperNames::from_config_struct(config);
2317
2318        let content = "<!-- Download from <ftp://test.example.com/file> -->\n";
2319        let ctx = create_context(content);
2320        let result = rule.check(&ctx).unwrap();
2321
2322        assert!(
2323            result.is_empty(),
2324            "Should not flag names inside angle-bracket FTP URLs: {result:?}"
2325        );
2326    }
2327
2328    #[test]
2329    fn test_angle_bracket_url_fix_preserves_url() {
2330        // Fix should not modify text inside angle-bracket URLs
2331        let config = MD044Config {
2332            names: vec!["Test".to_string()],
2333            ..MD044Config::default()
2334        };
2335        let rule = MD044ProperNames::from_config_struct(config);
2336
2337        let content = "<!-- test text <https://www.example.test> -->\n";
2338        let ctx = create_context(content);
2339        let fixed = rule.fix(&ctx).unwrap();
2340
2341        // "test" in prose should be fixed, URL should be preserved
2342        assert!(
2343            fixed.contains("<https://www.example.test>"),
2344            "Fix should preserve angle-bracket URLs: {fixed}"
2345        );
2346        assert!(
2347            fixed.contains("Test text"),
2348            "Fix should correct prose 'test' to 'Test': {fixed}"
2349        );
2350    }
2351
2352    #[test]
2353    fn test_is_in_angle_bracket_url_helper() {
2354        // Direct tests of the helper function
2355        let line = "text <https://example.test> more text";
2356
2357        // Inside the URL
2358        assert!(MD044ProperNames::is_in_angle_bracket_url(line, 5)); // '<'
2359        assert!(MD044ProperNames::is_in_angle_bracket_url(line, 6)); // 'h'
2360        assert!(MD044ProperNames::is_in_angle_bracket_url(line, 15)); // middle of URL
2361        assert!(MD044ProperNames::is_in_angle_bracket_url(line, 26)); // '>'
2362
2363        // Outside the URL
2364        assert!(!MD044ProperNames::is_in_angle_bracket_url(line, 0)); // 't' at start
2365        assert!(!MD044ProperNames::is_in_angle_bracket_url(line, 4)); // space before '<'
2366        assert!(!MD044ProperNames::is_in_angle_bracket_url(line, 27)); // space after '>'
2367
2368        // Non-URL angle brackets
2369        assert!(!MD044ProperNames::is_in_angle_bracket_url("<notaurl>", 1));
2370
2371        // mailto scheme
2372        assert!(MD044ProperNames::is_in_angle_bracket_url(
2373            "<mailto:test@example.com>",
2374            10
2375        ));
2376
2377        // ftp scheme
2378        assert!(MD044ProperNames::is_in_angle_bracket_url(
2379            "<ftp://test.example.com>",
2380            10
2381        ));
2382    }
2383
2384    #[test]
2385    fn test_is_in_angle_bracket_url_uppercase_scheme() {
2386        // RFC 3986: URI schemes are case-insensitive
2387        assert!(MD044ProperNames::is_in_angle_bracket_url(
2388            "<HTTPS://test.example.com>",
2389            10
2390        ));
2391        assert!(MD044ProperNames::is_in_angle_bracket_url(
2392            "<Http://test.example.com>",
2393            10
2394        ));
2395    }
2396
2397    #[test]
2398    fn test_is_in_angle_bracket_url_uncommon_schemes() {
2399        // ssh scheme
2400        assert!(MD044ProperNames::is_in_angle_bracket_url(
2401            "<ssh://test@example.com>",
2402            10
2403        ));
2404        // file scheme
2405        assert!(MD044ProperNames::is_in_angle_bracket_url("<file:///test/path>", 10));
2406        // data scheme (no authority, just colon)
2407        assert!(MD044ProperNames::is_in_angle_bracket_url("<data:text/plain;test>", 10));
2408    }
2409
2410    #[test]
2411    fn test_is_in_angle_bracket_url_unclosed() {
2412        // Unclosed angle bracket should NOT match
2413        assert!(!MD044ProperNames::is_in_angle_bracket_url(
2414            "<https://test.example.com",
2415            10
2416        ));
2417    }
2418
2419    #[test]
2420    fn test_vale_inline_config_comments_not_flagged() {
2421        let config = MD044Config {
2422            names: vec!["Vale".to_string(), "JavaScript".to_string()],
2423            ..MD044Config::default()
2424        };
2425        let rule = MD044ProperNames::from_config_struct(config);
2426
2427        let content = "\
2428<!-- vale off -->
2429Some javascript text here.
2430<!-- vale on -->
2431<!-- vale Style.Rule = NO -->
2432More javascript text.
2433<!-- vale Style.Rule = YES -->
2434<!-- vale JavaScript.Grammar = NO -->
2435";
2436        let ctx = create_context(content);
2437        let result = rule.check(&ctx).unwrap();
2438
2439        // Only the body text lines (2, 5) should be flagged for "javascript"
2440        assert_eq!(result.len(), 2, "Should only flag body lines, not Vale config comments");
2441        assert_eq!(result[0].line, 2);
2442        assert_eq!(result[1].line, 5);
2443    }
2444
2445    #[test]
2446    fn test_remark_lint_inline_config_comments_not_flagged() {
2447        let config = MD044Config {
2448            names: vec!["JavaScript".to_string()],
2449            ..MD044Config::default()
2450        };
2451        let rule = MD044ProperNames::from_config_struct(config);
2452
2453        let content = "\
2454<!-- lint disable remark-lint-some-rule -->
2455Some javascript text here.
2456<!-- lint enable remark-lint-some-rule -->
2457<!-- lint ignore remark-lint-some-rule -->
2458More javascript text.
2459";
2460        let ctx = create_context(content);
2461        let result = rule.check(&ctx).unwrap();
2462
2463        assert_eq!(
2464            result.len(),
2465            2,
2466            "Should only flag body lines, not remark-lint config comments"
2467        );
2468        assert_eq!(result[0].line, 2);
2469        assert_eq!(result[1].line, 5);
2470    }
2471
2472    #[test]
2473    fn test_fix_does_not_modify_vale_remark_lint_comments() {
2474        let config = MD044Config {
2475            names: vec!["JavaScript".to_string(), "Vale".to_string()],
2476            ..MD044Config::default()
2477        };
2478        let rule = MD044ProperNames::from_config_struct(config);
2479
2480        let content = "\
2481<!-- vale off -->
2482Some javascript text.
2483<!-- vale on -->
2484<!-- lint disable remark-lint-some-rule -->
2485More javascript text.
2486<!-- lint enable remark-lint-some-rule -->
2487";
2488        let ctx = create_context(content);
2489        let fixed = rule.fix(&ctx).unwrap();
2490
2491        // Config directive lines must be preserved unchanged
2492        assert!(fixed.contains("<!-- vale off -->"));
2493        assert!(fixed.contains("<!-- vale on -->"));
2494        assert!(fixed.contains("<!-- lint disable remark-lint-some-rule -->"));
2495        assert!(fixed.contains("<!-- lint enable remark-lint-some-rule -->"));
2496        // Body text should be fixed
2497        assert!(fixed.contains("Some JavaScript text."));
2498        assert!(fixed.contains("More JavaScript text."));
2499    }
2500
2501    #[test]
2502    fn test_mixed_tool_directives_all_skipped() {
2503        let config = MD044Config {
2504            names: vec!["JavaScript".to_string(), "Vale".to_string()],
2505            ..MD044Config::default()
2506        };
2507        let rule = MD044ProperNames::from_config_struct(config);
2508
2509        let content = "\
2510<!-- rumdl-disable MD044 -->
2511Some javascript text.
2512<!-- markdownlint-disable -->
2513More javascript text.
2514<!-- vale off -->
2515Even more javascript text.
2516<!-- lint disable some-rule -->
2517Final javascript text.
2518<!-- rumdl-enable MD044 -->
2519<!-- markdownlint-enable -->
2520<!-- vale on -->
2521<!-- lint enable some-rule -->
2522";
2523        let ctx = create_context(content);
2524        let result = rule.check(&ctx).unwrap();
2525
2526        // Only body text lines should be flagged (lines 2, 4, 6, 8)
2527        assert_eq!(
2528            result.len(),
2529            4,
2530            "Should only flag body lines, not any tool directive comments"
2531        );
2532        assert_eq!(result[0].line, 2);
2533        assert_eq!(result[1].line, 4);
2534        assert_eq!(result[2].line, 6);
2535        assert_eq!(result[3].line, 8);
2536    }
2537
2538    #[test]
2539    fn test_vale_remark_lint_edge_cases_not_matched() {
2540        let config = MD044Config {
2541            names: vec!["JavaScript".to_string(), "Vale".to_string()],
2542            ..MD044Config::default()
2543        };
2544        let rule = MD044ProperNames::from_config_struct(config);
2545
2546        // These are regular HTML comments, NOT tool directives:
2547        // - "<!-- vale -->" is not a valid Vale directive (no action keyword)
2548        // - "<!-- vale is a tool -->" starts with "vale" but is prose, not a directive
2549        // - "<!-- valedictorian javascript -->" does not start with "<!-- vale "
2550        // - "<!-- linting javascript tips -->" does not start with "<!-- lint "
2551        // - "<!-- vale javascript -->" starts with "vale" but has no action keyword
2552        // - "<!-- lint your javascript code -->" starts with "lint" but has no action keyword
2553        let content = "\
2554<!-- vale -->
2555<!-- vale is a tool for writing -->
2556<!-- valedictorian javascript -->
2557<!-- linting javascript tips -->
2558<!-- vale javascript -->
2559<!-- lint your javascript code -->
2560";
2561        let ctx = create_context(content);
2562        let result = rule.check(&ctx).unwrap();
2563
2564        // Line 1: "<!-- vale -->" contains "vale" (wrong case for "Vale") -> flagged
2565        // Line 2: "<!-- vale is a tool for writing -->" contains "vale" -> flagged
2566        // Line 3: "<!-- valedictorian javascript -->" contains "javascript" -> flagged
2567        // Line 4: "<!-- linting javascript tips -->" contains "javascript" -> flagged
2568        // Line 5: "<!-- vale javascript -->" contains "vale" and "javascript" -> flagged for both
2569        // Line 6: "<!-- lint your javascript code -->" contains "javascript" -> flagged
2570        assert_eq!(
2571            result.len(),
2572            7,
2573            "Should flag proper names in non-directive HTML comments: got {result:?}"
2574        );
2575        assert_eq!(result[0].line, 1); // "vale" in <!-- vale -->
2576        assert_eq!(result[1].line, 2); // "vale" in <!-- vale is a tool -->
2577        assert_eq!(result[2].line, 3); // "javascript" in <!-- valedictorian javascript -->
2578        assert_eq!(result[3].line, 4); // "javascript" in <!-- linting javascript tips -->
2579        assert_eq!(result[4].line, 5); // "vale" in <!-- vale javascript -->
2580        assert_eq!(result[5].line, 5); // "javascript" in <!-- vale javascript -->
2581        assert_eq!(result[6].line, 6); // "javascript" in <!-- lint your javascript code -->
2582    }
2583
2584    #[test]
2585    fn test_vale_style_directives_skipped() {
2586        let config = MD044Config {
2587            names: vec!["JavaScript".to_string(), "Vale".to_string()],
2588            ..MD044Config::default()
2589        };
2590        let rule = MD044ProperNames::from_config_struct(config);
2591
2592        // These ARE valid Vale directives and should be skipped:
2593        let content = "\
2594<!-- vale style = MyStyle -->
2595<!-- vale styles = Style1, Style2 -->
2596<!-- vale MyRule.Name = YES -->
2597<!-- vale MyRule.Name = NO -->
2598Some javascript text.
2599";
2600        let ctx = create_context(content);
2601        let result = rule.check(&ctx).unwrap();
2602
2603        // Only line 5 (body text) should be flagged
2604        assert_eq!(
2605            result.len(),
2606            1,
2607            "Should only flag body lines, not Vale style/rule directives: got {result:?}"
2608        );
2609        assert_eq!(result[0].line, 5);
2610    }
2611
2612    // --- is_in_backtick_code_in_line unit tests ---
2613
2614    #[test]
2615    fn test_backtick_code_single_backticks() {
2616        let line = "hello `world` bye";
2617        // 'w' is at index 7, inside the backtick span (content between backticks at 6 and 12)
2618        assert!(MD044ProperNames::is_in_backtick_code_in_line(line, 7));
2619        // 'h' at index 0 is outside
2620        assert!(!MD044ProperNames::is_in_backtick_code_in_line(line, 0));
2621        // 'b' at index 14 is outside
2622        assert!(!MD044ProperNames::is_in_backtick_code_in_line(line, 14));
2623    }
2624
2625    #[test]
2626    fn test_backtick_code_double_backticks() {
2627        let line = "a ``code`` b";
2628        // 'c' is at index 4, inside ``...``
2629        assert!(MD044ProperNames::is_in_backtick_code_in_line(line, 4));
2630        // 'a' at index 0 is outside
2631        assert!(!MD044ProperNames::is_in_backtick_code_in_line(line, 0));
2632        // 'b' at index 11 is outside
2633        assert!(!MD044ProperNames::is_in_backtick_code_in_line(line, 11));
2634    }
2635
2636    #[test]
2637    fn test_backtick_code_unclosed() {
2638        let line = "a `code b";
2639        // No closing backtick, so nothing is a code span
2640        assert!(!MD044ProperNames::is_in_backtick_code_in_line(line, 3));
2641    }
2642
2643    #[test]
2644    fn test_backtick_code_mismatched_count() {
2645        // Single backtick opening, double backtick is not a match
2646        let line = "a `code`` b";
2647        // The single ` at index 2 doesn't match `` at index 7-8
2648        // So 'c' at index 3 is NOT in a code span
2649        assert!(!MD044ProperNames::is_in_backtick_code_in_line(line, 3));
2650    }
2651
2652    #[test]
2653    fn test_backtick_code_multiple_spans() {
2654        let line = "`first` and `second`";
2655        // 'f' at index 1 (inside first span)
2656        assert!(MD044ProperNames::is_in_backtick_code_in_line(line, 1));
2657        // 'a' at index 8 (between spans)
2658        assert!(!MD044ProperNames::is_in_backtick_code_in_line(line, 8));
2659        // 's' at index 13 (inside second span)
2660        assert!(MD044ProperNames::is_in_backtick_code_in_line(line, 13));
2661    }
2662
2663    #[test]
2664    fn test_backtick_code_on_backtick_boundary() {
2665        let line = "`code`";
2666        // Position 0 is the opening backtick itself, not inside the span
2667        assert!(!MD044ProperNames::is_in_backtick_code_in_line(line, 0));
2668        // Position 5 is the closing backtick, not inside the span
2669        assert!(!MD044ProperNames::is_in_backtick_code_in_line(line, 5));
2670        // Position 1-4 are inside the span
2671        assert!(MD044ProperNames::is_in_backtick_code_in_line(line, 1));
2672        assert!(MD044ProperNames::is_in_backtick_code_in_line(line, 4));
2673    }
2674}
rumdl_lib/rules/md044_proper_names.rs

rumdl_lib/rules/
md044_proper_names.rs