rumdl_lib/rules/
md044_proper_names.rs

1use crate::utils::fast_hash;
2use crate::utils::regex_cache::{escape_regex, get_cached_fancy_regex};
3
4use crate::rule::{Fix, LintError, LintResult, LintWarning, Rule, Severity};
5use std::collections::{HashMap, HashSet};
6use std::sync::{Arc, Mutex};
7
8mod md044_config;
9pub use md044_config::MD044Config;
10
11type WarningPosition = (usize, usize, String); // (line, column, found_name)
12
13/// Rule MD044: Proper names should be capitalized
14///
15/// See [docs/md044.md](../../docs/md044.md) for full documentation, configuration, and examples.
16///
17/// This rule is triggered when proper names are not capitalized correctly in the document.
18/// For example, if you have defined "JavaScript" as a proper name, the rule will flag any
19/// occurrences of "javascript" or "Javascript" as violations.
20///
21/// ## Purpose
22///
23/// Ensuring consistent capitalization of proper names improves document quality and
24/// professionalism. This is especially important for technical documentation where
25/// product names, programming languages, and technologies often have specific
26/// capitalization conventions.
27///
28/// ## Configuration Options
29///
30/// The rule supports the following configuration options:
31///
32/// ```yaml
33/// MD044:
34///   names: []                # List of proper names to check for correct capitalization
35///   code-blocks: false       # Whether to check code blocks (default: false)
36/// ```
37///
38/// Example configuration:
39///
40/// ```yaml
41/// MD044:
42///   names: ["JavaScript", "Node.js", "TypeScript"]
43///   code-blocks: true
44/// ```
45///
46/// ## Performance Optimizations
47///
48/// This rule implements several performance optimizations:
49///
50/// 1. **Regex Caching**: Pre-compiles and caches regex patterns for each proper name
51/// 2. **Content Caching**: Caches results based on content hashing for repeated checks
52/// 3. **Efficient Text Processing**: Uses optimized algorithms to avoid redundant text processing
53/// 4. **Smart Code Block Detection**: Efficiently identifies and optionally excludes code blocks
54///
55/// ## Edge Cases Handled
56///
57/// - **Word Boundaries**: Only matches complete words, not substrings within other words
58/// - **Case Sensitivity**: Properly handles case-specific matching
59/// - **Code Blocks**: Optionally checks code blocks (controlled by code-blocks setting)
60/// - **Markdown Formatting**: Handles proper names within Markdown formatting elements
61///
62/// ## Fix Behavior
63///
64/// When fixing issues, this rule replaces incorrect capitalization with the correct form
65/// as defined in the configuration.
66///
67/// Check if a trimmed line is an inline config comment from a linting tool.
68/// Recognized tools: rumdl, markdownlint, Vale, and remark-lint.
69fn is_inline_config_comment(trimmed: &str) -> bool {
70    trimmed.starts_with("<!-- rumdl-")
71        || trimmed.starts_with("<!-- markdownlint-")
72        || trimmed.starts_with("<!-- vale off")
73        || trimmed.starts_with("<!-- vale on")
74        || (trimmed.starts_with("<!-- vale ") && trimmed.contains(" = "))
75        || trimmed.starts_with("<!-- vale style")
76        || trimmed.starts_with("<!-- lint disable ")
77        || trimmed.starts_with("<!-- lint enable ")
78        || trimmed.starts_with("<!-- lint ignore ")
79}
80
81#[derive(Clone)]
82pub struct MD044ProperNames {
83    config: MD044Config,
84    // Cache the combined regex pattern string
85    combined_pattern: Option<String>,
86    // Precomputed lowercase name variants for fast pre-checks
87    name_variants: Vec<String>,
88    // Cache for name violations by content hash
89    content_cache: Arc<Mutex<HashMap<u64, Vec<WarningPosition>>>>,
90}
91
92impl MD044ProperNames {
93    pub fn new(names: Vec<String>, code_blocks: bool) -> Self {
94        let config = MD044Config {
95            names,
96            code_blocks,
97            html_elements: true, // Default to checking HTML elements
98            html_comments: true, // Default to checking HTML comments
99        };
100        let combined_pattern = Self::create_combined_pattern(&config);
101        let name_variants = Self::build_name_variants(&config);
102        Self {
103            config,
104            combined_pattern,
105            name_variants,
106            content_cache: Arc::new(Mutex::new(HashMap::new())),
107        }
108    }
109
110    // Helper function for consistent ASCII normalization
111    fn ascii_normalize(s: &str) -> String {
112        s.replace(['é', 'è', 'ê', 'ë'], "e")
113            .replace(['à', 'á', 'â', 'ä', 'ã', 'å'], "a")
114            .replace(['ï', 'î', 'í', 'ì'], "i")
115            .replace(['ü', 'ú', 'ù', 'û'], "u")
116            .replace(['ö', 'ó', 'ò', 'ô', 'õ'], "o")
117            .replace('ñ', "n")
118            .replace('ç', "c")
119    }
120
121    pub fn from_config_struct(config: MD044Config) -> Self {
122        let combined_pattern = Self::create_combined_pattern(&config);
123        let name_variants = Self::build_name_variants(&config);
124        Self {
125            config,
126            combined_pattern,
127            name_variants,
128            content_cache: Arc::new(Mutex::new(HashMap::new())),
129        }
130    }
131
132    // Create a combined regex pattern for all proper names
133    fn create_combined_pattern(config: &MD044Config) -> Option<String> {
134        if config.names.is_empty() {
135            return None;
136        }
137
138        // Create patterns for all names and their variations
139        let mut patterns: Vec<String> = config
140            .names
141            .iter()
142            .flat_map(|name| {
143                let mut variations = vec![];
144                let lower_name = name.to_lowercase();
145
146                // Add the lowercase version
147                variations.push(escape_regex(&lower_name));
148
149                // Add version without dots
150                let lower_name_no_dots = lower_name.replace('.', "");
151                if lower_name != lower_name_no_dots {
152                    variations.push(escape_regex(&lower_name_no_dots));
153                }
154
155                // Add ASCII-normalized versions for common accented characters
156                let ascii_normalized = Self::ascii_normalize(&lower_name);
157
158                if ascii_normalized != lower_name {
159                    variations.push(escape_regex(&ascii_normalized));
160
161                    // Also add version without dots
162                    let ascii_no_dots = ascii_normalized.replace('.', "");
163                    if ascii_normalized != ascii_no_dots {
164                        variations.push(escape_regex(&ascii_no_dots));
165                    }
166                }
167
168                variations
169            })
170            .collect();
171
172        // Sort patterns by length (longest first) to avoid shorter patterns matching within longer ones
173        patterns.sort_by_key(|b| std::cmp::Reverse(b.len()));
174
175        // Combine all patterns into a single regex with capture groups
176        // Don't use \b as it doesn't work with Unicode - we'll check boundaries manually
177        Some(format!(r"(?i)({})", patterns.join("|")))
178    }
179
180    fn build_name_variants(config: &MD044Config) -> Vec<String> {
181        let mut variants = HashSet::new();
182        for name in &config.names {
183            let lower_name = name.to_lowercase();
184            variants.insert(lower_name.clone());
185
186            let lower_no_dots = lower_name.replace('.', "");
187            if lower_name != lower_no_dots {
188                variants.insert(lower_no_dots);
189            }
190
191            let ascii_normalized = Self::ascii_normalize(&lower_name);
192            if ascii_normalized != lower_name {
193                variants.insert(ascii_normalized.clone());
194
195                let ascii_no_dots = ascii_normalized.replace('.', "");
196                if ascii_normalized != ascii_no_dots {
197                    variants.insert(ascii_no_dots);
198                }
199            }
200        }
201
202        variants.into_iter().collect()
203    }
204
205    // Find all name violations in the content and return positions.
206    // `content_lower` is the pre-computed lowercase version of `content` to avoid redundant allocations.
207    fn find_name_violations(
208        &self,
209        content: &str,
210        ctx: &crate::lint_context::LintContext,
211        content_lower: &str,
212    ) -> Vec<WarningPosition> {
213        // Early return: if no names configured or content is empty
214        if self.config.names.is_empty() || content.is_empty() || self.combined_pattern.is_none() {
215            return Vec::new();
216        }
217
218        // Early return: quick check if any of the configured names might be in content
219        let has_potential_matches = self.name_variants.iter().any(|name| content_lower.contains(name));
220
221        if !has_potential_matches {
222            return Vec::new();
223        }
224
225        // Check if we have cached results
226        let hash = fast_hash(content);
227        {
228            // Use a separate scope for borrowing to minimize lock time
229            if let Ok(cache) = self.content_cache.lock()
230                && let Some(cached) = cache.get(&hash)
231            {
232                return cached.clone();
233            }
234        }
235
236        let mut violations = Vec::new();
237
238        // Get the regex from global cache
239        let combined_regex = match &self.combined_pattern {
240            Some(pattern) => match get_cached_fancy_regex(pattern) {
241                Ok(regex) => regex,
242                Err(_) => return Vec::new(),
243            },
244            None => return Vec::new(),
245        };
246
247        // Use ctx.lines for better performance
248        for (line_idx, line_info) in ctx.lines.iter().enumerate() {
249            let line_num = line_idx + 1;
250            let line = line_info.content(ctx.content);
251
252            // Skip code fence lines (```language or ~~~language)
253            let trimmed = line.trim_start();
254            if trimmed.starts_with("```") || trimmed.starts_with("~~~") {
255                continue;
256            }
257
258            // Skip if in code block (when code_blocks = false)
259            if !self.config.code_blocks && line_info.in_code_block {
260                continue;
261            }
262
263            // Skip if in HTML block (when html_elements = false)
264            if !self.config.html_elements && line_info.in_html_block {
265                continue;
266            }
267
268            // Skip HTML comments using pre-computed line flag
269            if !self.config.html_comments && line_info.in_html_comment {
270                continue;
271            }
272
273            // Skip JSX expressions and MDX comments (MDX flavor)
274            if line_info.in_jsx_expression || line_info.in_mdx_comment {
275                continue;
276            }
277
278            // Skip Obsidian comments (Obsidian flavor)
279            if line_info.in_obsidian_comment {
280                continue;
281            }
282
283            // For frontmatter lines, determine offset where checkable value content starts.
284            // YAML keys should not be checked against proper names - only values.
285            let fm_value_offset = if line_info.in_front_matter {
286                Self::frontmatter_value_offset(line)
287            } else {
288                0
289            };
290            if fm_value_offset == usize::MAX {
291                continue;
292            }
293
294            // Skip inline config comments (rumdl, markdownlint, Vale, remark-lint directives)
295            if is_inline_config_comment(trimmed) {
296                continue;
297            }
298
299            // Early return: skip lines that don't contain any potential matches
300            let line_lower = line.to_lowercase();
301            let has_line_matches = self.name_variants.iter().any(|name| line_lower.contains(name));
302
303            if !has_line_matches {
304                continue;
305            }
306
307            // Use the combined regex to find all matches in one pass
308            for cap_result in combined_regex.find_iter(line) {
309                match cap_result {
310                    Ok(cap) => {
311                        let found_name = &line[cap.start()..cap.end()];
312
313                        // Check word boundaries manually for Unicode support
314                        let start_pos = cap.start();
315                        let end_pos = cap.end();
316
317                        // Skip matches in the key portion of frontmatter lines
318                        if start_pos < fm_value_offset {
319                            continue;
320                        }
321
322                        // Skip matches inside HTML tag attributes (handles multi-line tags)
323                        let byte_pos = line_info.byte_offset + start_pos;
324                        if ctx.is_in_html_tag(byte_pos) {
325                            continue;
326                        }
327
328                        if !Self::is_at_word_boundary(line, start_pos, true)
329                            || !Self::is_at_word_boundary(line, end_pos, false)
330                        {
331                            continue; // Not at word boundary
332                        }
333
334                        // Skip if in inline code when code_blocks is false
335                        if !self.config.code_blocks {
336                            if ctx.is_in_code_block_or_span(byte_pos) {
337                                continue;
338                            }
339                            // pulldown-cmark doesn't parse markdown syntax inside HTML
340                            // comments, HTML blocks, or frontmatter, so backtick-wrapped
341                            // text isn't detected by is_in_code_block_or_span. Check directly.
342                            if (line_info.in_html_comment || line_info.in_html_block || line_info.in_front_matter)
343                                && Self::is_in_backtick_code_in_line(line, start_pos)
344                            {
345                                continue;
346                            }
347                        }
348
349                        // Skip if in link URL or reference definition
350                        if Self::is_in_link(ctx, byte_pos) {
351                            continue;
352                        }
353
354                        // Skip if inside an angle-bracket URL (e.g., <https://...>)
355                        // The link parser skips autolinks inside HTML comments,
356                        // so we detect them directly in the line text.
357                        if Self::is_in_angle_bracket_url(line, start_pos) {
358                            continue;
359                        }
360
361                        // Find which proper name this matches
362                        if let Some(proper_name) = self.get_proper_name_for(found_name) {
363                            // Only flag if it's not already correct
364                            if found_name != proper_name {
365                                violations.push((line_num, cap.start() + 1, found_name.to_string()));
366                            }
367                        }
368                    }
369                    Err(e) => {
370                        eprintln!("Regex execution error on line {line_num}: {e}");
371                    }
372                }
373            }
374        }
375
376        // Store in cache (ignore if mutex is poisoned)
377        if let Ok(mut cache) = self.content_cache.lock() {
378            cache.insert(hash, violations.clone());
379        }
380        violations
381    }
382
383    /// Check if a byte position is within a link URL (not link text)
384    ///
385    /// Link text should be checked for proper names, but URLs should be skipped.
386    /// For `[text](url)` - check text, skip url
387    /// For `[text][ref]` - check text, skip reference portion
388    /// For `[[text]]` (WikiLinks) - check text, skip brackets
389    fn is_in_link(ctx: &crate::lint_context::LintContext, byte_pos: usize) -> bool {
390        use pulldown_cmark::LinkType;
391
392        // Binary search links (sorted by byte_offset) to find candidate containing byte_pos
393        let link_idx = ctx.links.partition_point(|link| link.byte_offset <= byte_pos);
394        if link_idx > 0 {
395            let link = &ctx.links[link_idx - 1];
396            if byte_pos < link.byte_end {
397                // WikiLinks [[text]] start with '[[', regular links [text] start with '['
398                let text_start = if matches!(link.link_type, LinkType::WikiLink { .. }) {
399                    link.byte_offset + 2
400                } else {
401                    link.byte_offset + 1
402                };
403                let text_end = text_start + link.text.len();
404
405                // If position is within the text portion, skip only if text is a URL
406                if byte_pos >= text_start && byte_pos < text_end {
407                    return Self::link_text_is_url(&link.text);
408                }
409                // Position is in the URL/reference portion, skip it
410                return true;
411            }
412        }
413
414        // Binary search images (sorted by byte_offset) to find candidate containing byte_pos
415        let image_idx = ctx.images.partition_point(|img| img.byte_offset <= byte_pos);
416        if image_idx > 0 {
417            let image = &ctx.images[image_idx - 1];
418            if byte_pos < image.byte_end {
419                // Image starts with '![' so alt text starts at byte_offset + 2
420                let alt_start = image.byte_offset + 2;
421                let alt_end = alt_start + image.alt_text.len();
422
423                // If position is within the alt text portion, don't skip
424                if byte_pos >= alt_start && byte_pos < alt_end {
425                    return false;
426                }
427                // Position is in the URL/reference portion, skip it
428                return true;
429            }
430        }
431
432        // Check pre-computed reference definitions
433        ctx.is_in_reference_def(byte_pos)
434    }
435
436    /// Check if link text is a URL that should not have proper name corrections.
437    /// Matches markdownlint behavior: skip text starting with `http://`, `https://`, or `www.`.
438    fn link_text_is_url(text: &str) -> bool {
439        let lower = text.trim().to_ascii_lowercase();
440        lower.starts_with("http://") || lower.starts_with("https://") || lower.starts_with("www.")
441    }
442
443    /// Check if a position within a line falls inside an angle-bracket URL (`<scheme://...>`).
444    ///
445    /// The link parser skips autolinks inside HTML comments, so `ctx.links` won't
446    /// contain them. This function detects angle-bracket URLs directly in the line
447    /// text, covering both HTML comments and regular text as a safety net.
448    fn is_in_angle_bracket_url(line: &str, pos: usize) -> bool {
449        let bytes = line.as_bytes();
450        let len = bytes.len();
451        let mut i = 0;
452        while i < len {
453            if bytes[i] == b'<' {
454                let after_open = i + 1;
455                // Check for a valid URI scheme per CommonMark autolink spec:
456                // scheme = [a-zA-Z][a-zA-Z0-9+.-]{0,31}
457                // followed by ':'
458                if after_open < len && bytes[after_open].is_ascii_alphabetic() {
459                    let mut s = after_open + 1;
460                    let scheme_max = (after_open + 32).min(len);
461                    while s < scheme_max
462                        && (bytes[s].is_ascii_alphanumeric()
463                            || bytes[s] == b'+'
464                            || bytes[s] == b'-'
465                            || bytes[s] == b'.')
466                    {
467                        s += 1;
468                    }
469                    if s < len && bytes[s] == b':' {
470                        // Valid scheme found; scan for closing '>' with no spaces or '<'
471                        let mut j = s + 1;
472                        let mut found_close = false;
473                        while j < len {
474                            match bytes[j] {
475                                b'>' => {
476                                    found_close = true;
477                                    break;
478                                }
479                                b' ' | b'<' => break,
480                                _ => j += 1,
481                            }
482                        }
483                        if found_close && pos >= i && pos <= j {
484                            return true;
485                        }
486                        if found_close {
487                            i = j + 1;
488                            continue;
489                        }
490                    }
491                }
492            }
493            i += 1;
494        }
495        false
496    }
497
498    /// Check if a position within a line falls inside backtick-delimited code.
499    ///
500    /// pulldown-cmark does not parse markdown syntax inside HTML comments, so
501    /// `ctx.is_in_code_block_or_span` returns false for backtick-wrapped text
502    /// within comments. This function detects backtick code spans directly in
503    /// the line text following CommonMark rules: a code span starts with N
504    /// backticks and ends with exactly N backticks.
505    fn is_in_backtick_code_in_line(line: &str, pos: usize) -> bool {
506        let bytes = line.as_bytes();
507        let len = bytes.len();
508        let mut i = 0;
509        while i < len {
510            if bytes[i] == b'`' {
511                // Count the opening backtick sequence length
512                let open_start = i;
513                while i < len && bytes[i] == b'`' {
514                    i += 1;
515                }
516                let tick_len = i - open_start;
517
518                // Scan forward for a closing sequence of exactly tick_len backticks
519                while i < len {
520                    if bytes[i] == b'`' {
521                        let close_start = i;
522                        while i < len && bytes[i] == b'`' {
523                            i += 1;
524                        }
525                        if i - close_start == tick_len {
526                            // Matched pair found; the code span content is between
527                            // the end of the opening backticks and the start of the
528                            // closing backticks (exclusive of the backticks themselves).
529                            let content_start = open_start + tick_len;
530                            let content_end = close_start;
531                            if pos >= content_start && pos < content_end {
532                                return true;
533                            }
534                            // Continue scanning after this pair
535                            break;
536                        }
537                        // Not the right length; keep scanning
538                    } else {
539                        i += 1;
540                    }
541                }
542            } else {
543                i += 1;
544            }
545        }
546        false
547    }
548
549    // Check if a character is a word boundary (handles Unicode)
550    fn is_word_boundary_char(c: char) -> bool {
551        !c.is_alphanumeric()
552    }
553
554    // Check if position is at a word boundary using byte-level lookups.
555    fn is_at_word_boundary(content: &str, pos: usize, is_start: bool) -> bool {
556        if is_start {
557            if pos == 0 {
558                return true;
559            }
560            match content[..pos].chars().next_back() {
561                None => true,
562                Some(c) => Self::is_word_boundary_char(c),
563            }
564        } else {
565            if pos >= content.len() {
566                return true;
567            }
568            match content[pos..].chars().next() {
569                None => true,
570                Some(c) => Self::is_word_boundary_char(c),
571            }
572        }
573    }
574
575    /// For a frontmatter line, return the byte offset where the checkable
576    /// value portion starts. Returns `usize::MAX` if the entire line should be
577    /// skipped (frontmatter delimiters, key-only lines, YAML comments, flow constructs).
578    fn frontmatter_value_offset(line: &str) -> usize {
579        let trimmed = line.trim();
580
581        // Skip frontmatter delimiters and empty lines
582        if trimmed == "---" || trimmed == "+++" || trimmed.is_empty() {
583            return usize::MAX;
584        }
585
586        // Skip YAML comments
587        if trimmed.starts_with('#') {
588            return usize::MAX;
589        }
590
591        // YAML list item: "  - item" or "  - key: value"
592        let stripped = line.trim_start();
593        if let Some(after_dash) = stripped.strip_prefix("- ") {
594            let leading = line.len() - stripped.len();
595            // Check if the list item contains a mapping (e.g., "- key: value")
596            if let Some(result) = Self::kv_value_offset(line, after_dash, leading + 2) {
597                return result;
598            }
599            // Bare list item value (no colon) - check content after "- "
600            return leading + 2;
601        }
602        if stripped == "-" {
603            return usize::MAX;
604        }
605
606        // Key-value pair with colon separator (YAML): "key: value"
607        if let Some(result) = Self::kv_value_offset(line, stripped, line.len() - stripped.len()) {
608            return result;
609        }
610
611        // Key-value pair with equals separator (TOML): "key = value"
612        if let Some(eq_pos) = line.find('=') {
613            let after_eq = eq_pos + 1;
614            if after_eq < line.len() && line.as_bytes()[after_eq] == b' ' {
615                let value_start = after_eq + 1;
616                let value_slice = &line[value_start..];
617                let value_trimmed = value_slice.trim();
618                if value_trimmed.is_empty() {
619                    return usize::MAX;
620                }
621                // For quoted values, skip the opening quote character
622                if (value_trimmed.starts_with('"') && value_trimmed.ends_with('"'))
623                    || (value_trimmed.starts_with('\'') && value_trimmed.ends_with('\''))
624                {
625                    let quote_offset = value_slice.find(['"', '\'']).unwrap_or(0);
626                    return value_start + quote_offset + 1;
627                }
628                return value_start;
629            }
630            // Equals with no space after or at end of line -> no value to check
631            return usize::MAX;
632        }
633
634        // No separator found - continuation line or bare value, check the whole line
635        0
636    }
637
638    /// Parse a key-value pair using colon separator within `content` that starts
639    /// at `base_offset` in the original line. Returns `Some(offset)` if a colon
640    /// separator is found, `None` if no colon is present.
641    fn kv_value_offset(line: &str, content: &str, base_offset: usize) -> Option<usize> {
642        let colon_pos = content.find(':')?;
643        let abs_colon = base_offset + colon_pos;
644        let after_colon = abs_colon + 1;
645        if after_colon < line.len() && line.as_bytes()[after_colon] == b' ' {
646            let value_start = after_colon + 1;
647            let value_slice = &line[value_start..];
648            let value_trimmed = value_slice.trim();
649            if value_trimmed.is_empty() {
650                return Some(usize::MAX);
651            }
652            // Skip flow mappings and flow sequences - too complex for heuristic parsing
653            if value_trimmed.starts_with('{') || value_trimmed.starts_with('[') {
654                return Some(usize::MAX);
655            }
656            // For quoted values, skip the opening quote character
657            if (value_trimmed.starts_with('"') && value_trimmed.ends_with('"'))
658                || (value_trimmed.starts_with('\'') && value_trimmed.ends_with('\''))
659            {
660                let quote_offset = value_slice.find(['"', '\'']).unwrap_or(0);
661                return Some(value_start + quote_offset + 1);
662            }
663            return Some(value_start);
664        }
665        // Colon with no space after or at end of line -> no value to check
666        Some(usize::MAX)
667    }
668
669    // Get the proper name that should be used for a found name
670    fn get_proper_name_for(&self, found_name: &str) -> Option<String> {
671        let found_lower = found_name.to_lowercase();
672
673        // Iterate through the configured proper names
674        for name in &self.config.names {
675            let lower_name = name.to_lowercase();
676            let lower_name_no_dots = lower_name.replace('.', "");
677
678            // Direct match
679            if found_lower == lower_name || found_lower == lower_name_no_dots {
680                return Some(name.clone());
681            }
682
683            // Check ASCII-normalized version
684            let ascii_normalized = Self::ascii_normalize(&lower_name);
685
686            let ascii_no_dots = ascii_normalized.replace('.', "");
687
688            if found_lower == ascii_normalized || found_lower == ascii_no_dots {
689                return Some(name.clone());
690            }
691        }
692        None
693    }
694}
695
696impl Rule for MD044ProperNames {
697    fn name(&self) -> &'static str {
698        "MD044"
699    }
700
701    fn description(&self) -> &'static str {
702        "Proper names should have the correct capitalization"
703    }
704
705    fn should_skip(&self, ctx: &crate::lint_context::LintContext) -> bool {
706        if self.config.names.is_empty() {
707            return true;
708        }
709        // Quick check if any configured name variants exist (case-insensitive)
710        let content_lower = if ctx.content.is_ascii() {
711            ctx.content.to_ascii_lowercase()
712        } else {
713            ctx.content.to_lowercase()
714        };
715        !self.name_variants.iter().any(|name| content_lower.contains(name))
716    }
717
718    fn check(&self, ctx: &crate::lint_context::LintContext) -> LintResult {
719        let content = ctx.content;
720        if content.is_empty() || self.config.names.is_empty() || self.combined_pattern.is_none() {
721            return Ok(Vec::new());
722        }
723
724        // Compute lowercase content once and reuse across all checks
725        let content_lower = if content.is_ascii() {
726            content.to_ascii_lowercase()
727        } else {
728            content.to_lowercase()
729        };
730
731        // Early return: use pre-computed name_variants for the quick check
732        let has_potential_matches = self.name_variants.iter().any(|name| content_lower.contains(name));
733
734        if !has_potential_matches {
735            return Ok(Vec::new());
736        }
737
738        let line_index = &ctx.line_index;
739        let violations = self.find_name_violations(content, ctx, &content_lower);
740
741        let warnings = violations
742            .into_iter()
743            .filter_map(|(line, column, found_name)| {
744                self.get_proper_name_for(&found_name).map(|proper_name| LintWarning {
745                    rule_name: Some(self.name().to_string()),
746                    line,
747                    column,
748                    end_line: line,
749                    end_column: column + found_name.len(),
750                    message: format!("Proper name '{found_name}' should be '{proper_name}'"),
751                    severity: Severity::Warning,
752                    fix: Some(Fix {
753                        range: line_index.line_col_to_byte_range_with_length(line, column, found_name.len()),
754                        replacement: proper_name,
755                    }),
756                })
757            })
758            .collect();
759
760        Ok(warnings)
761    }
762
763    fn fix(&self, ctx: &crate::lint_context::LintContext) -> Result<String, LintError> {
764        let content = ctx.content;
765        if content.is_empty() || self.config.names.is_empty() {
766            return Ok(content.to_string());
767        }
768
769        let content_lower = if content.is_ascii() {
770            content.to_ascii_lowercase()
771        } else {
772            content.to_lowercase()
773        };
774        let violations = self.find_name_violations(content, ctx, &content_lower);
775        if violations.is_empty() {
776            return Ok(content.to_string());
777        }
778
779        // Process lines and build the fixed content
780        let mut fixed_lines = Vec::new();
781
782        // Group violations by line
783        let mut violations_by_line: HashMap<usize, Vec<(usize, String)>> = HashMap::new();
784        for (line_num, col_num, found_name) in violations {
785            violations_by_line
786                .entry(line_num)
787                .or_default()
788                .push((col_num, found_name));
789        }
790
791        // Sort violations within each line in reverse order
792        for violations in violations_by_line.values_mut() {
793            violations.sort_by_key(|b| std::cmp::Reverse(b.0));
794        }
795
796        // Process each line
797        for (line_idx, line_info) in ctx.lines.iter().enumerate() {
798            let line_num = line_idx + 1;
799
800            // Skip lines where this rule is disabled by inline config
801            if ctx.inline_config().is_rule_disabled(self.name(), line_num) {
802                fixed_lines.push(line_info.content(ctx.content).to_string());
803                continue;
804            }
805
806            if let Some(line_violations) = violations_by_line.get(&line_num) {
807                // This line has violations, fix them
808                let mut fixed_line = line_info.content(ctx.content).to_string();
809
810                for (col_num, found_name) in line_violations {
811                    if let Some(proper_name) = self.get_proper_name_for(found_name) {
812                        let start_col = col_num - 1; // Convert to 0-based
813                        let end_col = start_col + found_name.len();
814
815                        if end_col <= fixed_line.len()
816                            && fixed_line.is_char_boundary(start_col)
817                            && fixed_line.is_char_boundary(end_col)
818                        {
819                            fixed_line.replace_range(start_col..end_col, &proper_name);
820                        }
821                    }
822                }
823
824                fixed_lines.push(fixed_line);
825            } else {
826                // No violations on this line, keep it as is
827                fixed_lines.push(line_info.content(ctx.content).to_string());
828            }
829        }
830
831        // Join lines with newlines, preserving the original ending
832        let mut result = fixed_lines.join("\n");
833        if content.ends_with('\n') && !result.ends_with('\n') {
834            result.push('\n');
835        }
836        Ok(result)
837    }
838
839    fn as_any(&self) -> &dyn std::any::Any {
840        self
841    }
842
843    fn default_config_section(&self) -> Option<(String, toml::Value)> {
844        let json_value = serde_json::to_value(&self.config).ok()?;
845        Some((
846            self.name().to_string(),
847            crate::rule_config_serde::json_to_toml_value(&json_value)?,
848        ))
849    }
850
851    fn from_config(config: &crate::config::Config) -> Box<dyn Rule>
852    where
853        Self: Sized,
854    {
855        let rule_config = crate::rule_config_serde::load_rule_config::<MD044Config>(config);
856        Box::new(Self::from_config_struct(rule_config))
857    }
858}
859
860#[cfg(test)]
861mod tests {
862    use super::*;
863    use crate::lint_context::LintContext;
864
865    fn create_context(content: &str) -> LintContext<'_> {
866        LintContext::new(content, crate::config::MarkdownFlavor::Standard, None)
867    }
868
869    #[test]
870    fn test_correctly_capitalized_names() {
871        let rule = MD044ProperNames::new(
872            vec![
873                "JavaScript".to_string(),
874                "TypeScript".to_string(),
875                "Node.js".to_string(),
876            ],
877            true,
878        );
879
880        let content = "This document uses JavaScript, TypeScript, and Node.js correctly.";
881        let ctx = create_context(content);
882        let result = rule.check(&ctx).unwrap();
883        assert!(result.is_empty(), "Should not flag correctly capitalized names");
884    }
885
886    #[test]
887    fn test_incorrectly_capitalized_names() {
888        let rule = MD044ProperNames::new(vec!["JavaScript".to_string(), "TypeScript".to_string()], true);
889
890        let content = "This document uses javascript and typescript incorrectly.";
891        let ctx = create_context(content);
892        let result = rule.check(&ctx).unwrap();
893
894        assert_eq!(result.len(), 2, "Should flag two incorrect capitalizations");
895        assert_eq!(result[0].message, "Proper name 'javascript' should be 'JavaScript'");
896        assert_eq!(result[0].line, 1);
897        assert_eq!(result[0].column, 20);
898        assert_eq!(result[1].message, "Proper name 'typescript' should be 'TypeScript'");
899        assert_eq!(result[1].line, 1);
900        assert_eq!(result[1].column, 35);
901    }
902
903    #[test]
904    fn test_names_at_beginning_of_sentences() {
905        let rule = MD044ProperNames::new(vec!["JavaScript".to_string(), "Python".to_string()], true);
906
907        let content = "javascript is a great language. python is also popular.";
908        let ctx = create_context(content);
909        let result = rule.check(&ctx).unwrap();
910
911        assert_eq!(result.len(), 2, "Should flag names at beginning of sentences");
912        assert_eq!(result[0].line, 1);
913        assert_eq!(result[0].column, 1);
914        assert_eq!(result[1].line, 1);
915        assert_eq!(result[1].column, 33);
916    }
917
918    #[test]
919    fn test_names_in_code_blocks_checked_by_default() {
920        let rule = MD044ProperNames::new(vec!["JavaScript".to_string()], true);
921
922        let content = r#"Here is some text with JavaScript.
923
924```javascript
925// This javascript should be checked
926const lang = "javascript";
927```
928
929But this javascript should be flagged."#;
930
931        let ctx = create_context(content);
932        let result = rule.check(&ctx).unwrap();
933
934        assert_eq!(result.len(), 3, "Should flag javascript inside and outside code blocks");
935        assert_eq!(result[0].line, 4);
936        assert_eq!(result[1].line, 5);
937        assert_eq!(result[2].line, 8);
938    }
939
940    #[test]
941    fn test_names_in_code_blocks_ignored_when_disabled() {
942        let rule = MD044ProperNames::new(
943            vec!["JavaScript".to_string()],
944            false, // code_blocks = false means skip code blocks
945        );
946
947        let content = r#"```
948javascript in code block
949```"#;
950
951        let ctx = create_context(content);
952        let result = rule.check(&ctx).unwrap();
953
954        assert_eq!(
955            result.len(),
956            0,
957            "Should not flag javascript in code blocks when code_blocks is false"
958        );
959    }
960
961    #[test]
962    fn test_names_in_inline_code_checked_by_default() {
963        let rule = MD044ProperNames::new(vec!["JavaScript".to_string()], true);
964
965        let content = "This is `javascript` in inline code and javascript outside.";
966        let ctx = create_context(content);
967        let result = rule.check(&ctx).unwrap();
968
969        // When code_blocks=true, inline code should be checked
970        assert_eq!(result.len(), 2, "Should flag javascript inside and outside inline code");
971        assert_eq!(result[0].column, 10); // javascript in inline code
972        assert_eq!(result[1].column, 41); // javascript outside
973    }
974
975    #[test]
976    fn test_multiple_names_in_same_line() {
977        let rule = MD044ProperNames::new(
978            vec!["JavaScript".to_string(), "TypeScript".to_string(), "React".to_string()],
979            true,
980        );
981
982        let content = "I use javascript, typescript, and react in my projects.";
983        let ctx = create_context(content);
984        let result = rule.check(&ctx).unwrap();
985
986        assert_eq!(result.len(), 3, "Should flag all three incorrect names");
987        assert_eq!(result[0].message, "Proper name 'javascript' should be 'JavaScript'");
988        assert_eq!(result[1].message, "Proper name 'typescript' should be 'TypeScript'");
989        assert_eq!(result[2].message, "Proper name 'react' should be 'React'");
990    }
991
992    #[test]
993    fn test_case_sensitivity() {
994        let rule = MD044ProperNames::new(vec!["JavaScript".to_string()], true);
995
996        let content = "JAVASCRIPT, Javascript, javascript, and JavaScript variations.";
997        let ctx = create_context(content);
998        let result = rule.check(&ctx).unwrap();
999
1000        assert_eq!(result.len(), 3, "Should flag all incorrect case variations");
1001        // JavaScript (correct) should not be flagged
1002        assert!(result.iter().all(|w| w.message.contains("should be 'JavaScript'")));
1003    }
1004
1005    #[test]
1006    fn test_configuration_with_custom_name_list() {
1007        let config = MD044Config {
1008            names: vec!["GitHub".to_string(), "GitLab".to_string(), "DevOps".to_string()],
1009            code_blocks: true,
1010            html_elements: true,
1011            html_comments: true,
1012        };
1013        let rule = MD044ProperNames::from_config_struct(config);
1014
1015        let content = "We use github, gitlab, and devops for our workflow.";
1016        let ctx = create_context(content);
1017        let result = rule.check(&ctx).unwrap();
1018
1019        assert_eq!(result.len(), 3, "Should flag all custom names");
1020        assert_eq!(result[0].message, "Proper name 'github' should be 'GitHub'");
1021        assert_eq!(result[1].message, "Proper name 'gitlab' should be 'GitLab'");
1022        assert_eq!(result[2].message, "Proper name 'devops' should be 'DevOps'");
1023    }
1024
1025    #[test]
1026    fn test_empty_configuration() {
1027        let rule = MD044ProperNames::new(vec![], true);
1028
1029        let content = "This has javascript and typescript but no configured names.";
1030        let ctx = create_context(content);
1031        let result = rule.check(&ctx).unwrap();
1032
1033        assert!(result.is_empty(), "Should not flag anything with empty configuration");
1034    }
1035
1036    #[test]
1037    fn test_names_with_special_characters() {
1038        let rule = MD044ProperNames::new(
1039            vec!["Node.js".to_string(), "ASP.NET".to_string(), "C++".to_string()],
1040            true,
1041        );
1042
1043        let content = "We use nodejs, asp.net, ASP.NET, and c++ in our stack.";
1044        let ctx = create_context(content);
1045        let result = rule.check(&ctx).unwrap();
1046
1047        // nodejs should match Node.js (dotless variation)
1048        // asp.net should be flagged (wrong case)
1049        // ASP.NET should not be flagged (correct)
1050        // c++ should be flagged
1051        assert_eq!(result.len(), 3, "Should handle special characters correctly");
1052
1053        let messages: Vec<&str> = result.iter().map(|w| w.message.as_str()).collect();
1054        assert!(messages.contains(&"Proper name 'nodejs' should be 'Node.js'"));
1055        assert!(messages.contains(&"Proper name 'asp.net' should be 'ASP.NET'"));
1056        assert!(messages.contains(&"Proper name 'c++' should be 'C++'"));
1057    }
1058
1059    #[test]
1060    fn test_word_boundaries() {
1061        let rule = MD044ProperNames::new(vec!["Java".to_string(), "Script".to_string()], true);
1062
1063        let content = "JavaScript is not java or script, but Java and Script are separate.";
1064        let ctx = create_context(content);
1065        let result = rule.check(&ctx).unwrap();
1066
1067        // Should only flag lowercase "java" and "script" as separate words
1068        assert_eq!(result.len(), 2, "Should respect word boundaries");
1069        assert!(result.iter().any(|w| w.column == 19)); // "java" position
1070        assert!(result.iter().any(|w| w.column == 27)); // "script" position
1071    }
1072
1073    #[test]
1074    fn test_fix_method() {
1075        let rule = MD044ProperNames::new(
1076            vec![
1077                "JavaScript".to_string(),
1078                "TypeScript".to_string(),
1079                "Node.js".to_string(),
1080            ],
1081            true,
1082        );
1083
1084        let content = "I love javascript, typescript, and nodejs!";
1085        let ctx = create_context(content);
1086        let fixed = rule.fix(&ctx).unwrap();
1087
1088        assert_eq!(fixed, "I love JavaScript, TypeScript, and Node.js!");
1089    }
1090
1091    #[test]
1092    fn test_fix_multiple_occurrences() {
1093        let rule = MD044ProperNames::new(vec!["Python".to_string()], true);
1094
1095        let content = "python is great. I use python daily. PYTHON is powerful.";
1096        let ctx = create_context(content);
1097        let fixed = rule.fix(&ctx).unwrap();
1098
1099        assert_eq!(fixed, "Python is great. I use Python daily. Python is powerful.");
1100    }
1101
1102    #[test]
1103    fn test_fix_checks_code_blocks_by_default() {
1104        let rule = MD044ProperNames::new(vec!["JavaScript".to_string()], true);
1105
1106        let content = r#"I love javascript.
1107
1108```
1109const lang = "javascript";
1110```
1111
1112More javascript here."#;
1113
1114        let ctx = create_context(content);
1115        let fixed = rule.fix(&ctx).unwrap();
1116
1117        let expected = r#"I love JavaScript.
1118
1119```
1120const lang = "JavaScript";
1121```
1122
1123More JavaScript here."#;
1124
1125        assert_eq!(fixed, expected);
1126    }
1127
1128    #[test]
1129    fn test_multiline_content() {
1130        let rule = MD044ProperNames::new(vec!["Rust".to_string(), "Python".to_string()], true);
1131
1132        let content = r#"First line with rust.
1133Second line with python.
1134Third line with RUST and PYTHON."#;
1135
1136        let ctx = create_context(content);
1137        let result = rule.check(&ctx).unwrap();
1138
1139        assert_eq!(result.len(), 4, "Should flag all incorrect occurrences");
1140        assert_eq!(result[0].line, 1);
1141        assert_eq!(result[1].line, 2);
1142        assert_eq!(result[2].line, 3);
1143        assert_eq!(result[3].line, 3);
1144    }
1145
1146    #[test]
1147    fn test_default_config() {
1148        let config = MD044Config::default();
1149        assert!(config.names.is_empty());
1150        assert!(!config.code_blocks);
1151        assert!(config.html_elements);
1152        assert!(config.html_comments);
1153    }
1154
1155    #[test]
1156    fn test_default_config_checks_html_comments() {
1157        let config = MD044Config {
1158            names: vec!["JavaScript".to_string()],
1159            ..MD044Config::default()
1160        };
1161        let rule = MD044ProperNames::from_config_struct(config);
1162
1163        let content = "# Guide\n\n<!-- javascript mentioned here -->\n";
1164        let ctx = create_context(content);
1165        let result = rule.check(&ctx).unwrap();
1166
1167        assert_eq!(result.len(), 1, "Default config should check HTML comments");
1168        assert_eq!(result[0].line, 3);
1169    }
1170
1171    #[test]
1172    fn test_default_config_skips_code_blocks() {
1173        let config = MD044Config {
1174            names: vec!["JavaScript".to_string()],
1175            ..MD044Config::default()
1176        };
1177        let rule = MD044ProperNames::from_config_struct(config);
1178
1179        let content = "# Guide\n\n```\njavascript in code\n```\n";
1180        let ctx = create_context(content);
1181        let result = rule.check(&ctx).unwrap();
1182
1183        assert_eq!(result.len(), 0, "Default config should skip code blocks");
1184    }
1185
1186    #[test]
1187    fn test_standalone_html_comment_checked() {
1188        let config = MD044Config {
1189            names: vec!["Test".to_string()],
1190            ..MD044Config::default()
1191        };
1192        let rule = MD044ProperNames::from_config_struct(config);
1193
1194        let content = "# Heading\n\n<!-- this is a test example -->\n";
1195        let ctx = create_context(content);
1196        let result = rule.check(&ctx).unwrap();
1197
1198        assert_eq!(result.len(), 1, "Should flag proper name in standalone HTML comment");
1199        assert_eq!(result[0].line, 3);
1200    }
1201
1202    #[test]
1203    fn test_inline_config_comments_not_flagged() {
1204        let config = MD044Config {
1205            names: vec!["RUMDL".to_string()],
1206            ..MD044Config::default()
1207        };
1208        let rule = MD044ProperNames::from_config_struct(config);
1209
1210        // Lines 1, 3, 4, 6 are inline config comments — should not be flagged.
1211        // Lines 2, 5 contain "rumdl" in regular text — flagged by rule.check(),
1212        // but would be suppressed by the linting engine's inline config filtering.
1213        let content = "<!-- rumdl-disable MD044 -->\nSome rumdl text here.\n<!-- rumdl-enable MD044 -->\n<!-- markdownlint-disable -->\nMore rumdl text.\n<!-- markdownlint-enable -->\n";
1214        let ctx = create_context(content);
1215        let result = rule.check(&ctx).unwrap();
1216
1217        assert_eq!(result.len(), 2, "Should only flag body lines, not config comments");
1218        assert_eq!(result[0].line, 2);
1219        assert_eq!(result[1].line, 5);
1220    }
1221
1222    #[test]
1223    fn test_html_comment_skipped_when_disabled() {
1224        let config = MD044Config {
1225            names: vec!["Test".to_string()],
1226            code_blocks: true,
1227            html_elements: true,
1228            html_comments: false,
1229        };
1230        let rule = MD044ProperNames::from_config_struct(config);
1231
1232        let content = "# Heading\n\n<!-- this is a test example -->\n\nRegular test here.\n";
1233        let ctx = create_context(content);
1234        let result = rule.check(&ctx).unwrap();
1235
1236        assert_eq!(
1237            result.len(),
1238            1,
1239            "Should only flag 'test' outside HTML comment when html_comments=false"
1240        );
1241        assert_eq!(result[0].line, 5);
1242    }
1243
1244    #[test]
1245    fn test_fix_corrects_html_comment_content() {
1246        let config = MD044Config {
1247            names: vec!["JavaScript".to_string()],
1248            ..MD044Config::default()
1249        };
1250        let rule = MD044ProperNames::from_config_struct(config);
1251
1252        let content = "# Guide\n\n<!-- javascript mentioned here -->\n";
1253        let ctx = create_context(content);
1254        let fixed = rule.fix(&ctx).unwrap();
1255
1256        assert_eq!(fixed, "# Guide\n\n<!-- JavaScript mentioned here -->\n");
1257    }
1258
1259    #[test]
1260    fn test_fix_does_not_modify_inline_config_comments() {
1261        let config = MD044Config {
1262            names: vec!["RUMDL".to_string()],
1263            ..MD044Config::default()
1264        };
1265        let rule = MD044ProperNames::from_config_struct(config);
1266
1267        let content = "<!-- rumdl-disable -->\nSome rumdl text.\n<!-- rumdl-enable -->\n";
1268        let ctx = create_context(content);
1269        let fixed = rule.fix(&ctx).unwrap();
1270
1271        // Config comments should be untouched
1272        assert!(fixed.contains("<!-- rumdl-disable -->"));
1273        assert!(fixed.contains("<!-- rumdl-enable -->"));
1274        // Body text inside disable block should NOT be fixed (rule is disabled)
1275        assert!(
1276            fixed.contains("Some rumdl text."),
1277            "Line inside rumdl-disable block should not be modified by fix()"
1278        );
1279    }
1280
1281    #[test]
1282    fn test_fix_respects_inline_disable_partial() {
1283        let config = MD044Config {
1284            names: vec!["RUMDL".to_string()],
1285            ..MD044Config::default()
1286        };
1287        let rule = MD044ProperNames::from_config_struct(config);
1288
1289        let content =
1290            "<!-- rumdl-disable MD044 -->\nSome rumdl text.\n<!-- rumdl-enable MD044 -->\n\nSome rumdl text outside.\n";
1291        let ctx = create_context(content);
1292        let fixed = rule.fix(&ctx).unwrap();
1293
1294        // Line inside disable block should be preserved
1295        assert!(
1296            fixed.contains("Some rumdl text.\n<!-- rumdl-enable"),
1297            "Line inside disable block should not be modified"
1298        );
1299        // Line outside disable block should be fixed
1300        assert!(
1301            fixed.contains("Some RUMDL text outside."),
1302            "Line outside disable block should be fixed"
1303        );
1304    }
1305
1306    #[test]
1307    fn test_performance_with_many_names() {
1308        let mut names = vec![];
1309        for i in 0..50 {
1310            names.push(format!("ProperName{i}"));
1311        }
1312
1313        let rule = MD044ProperNames::new(names, true);
1314
1315        let content = "This has propername0, propername25, and propername49 incorrectly.";
1316        let ctx = create_context(content);
1317        let result = rule.check(&ctx).unwrap();
1318
1319        assert_eq!(result.len(), 3, "Should handle many configured names efficiently");
1320    }
1321
1322    #[test]
1323    fn test_large_name_count_performance() {
1324        // Verify MD044 can handle large numbers of names without regex limitations
1325        // This test confirms that fancy-regex handles large patterns well
1326        let names = (0..1000).map(|i| format!("ProperName{i}")).collect::<Vec<_>>();
1327
1328        let rule = MD044ProperNames::new(names, true);
1329
1330        // The combined pattern should be created successfully
1331        assert!(rule.combined_pattern.is_some());
1332
1333        // Should be able to check content without errors
1334        let content = "This has propername0 and propername999 in it.";
1335        let ctx = create_context(content);
1336        let result = rule.check(&ctx).unwrap();
1337
1338        // Should detect both incorrect names
1339        assert_eq!(result.len(), 2, "Should handle 1000 names without issues");
1340    }
1341
1342    #[test]
1343    fn test_cache_behavior() {
1344        let rule = MD044ProperNames::new(vec!["JavaScript".to_string()], true);
1345
1346        let content = "Using javascript here.";
1347        let ctx = create_context(content);
1348
1349        // First check
1350        let result1 = rule.check(&ctx).unwrap();
1351        assert_eq!(result1.len(), 1);
1352
1353        // Second check should use cache
1354        let result2 = rule.check(&ctx).unwrap();
1355        assert_eq!(result2.len(), 1);
1356
1357        // Results should be identical
1358        assert_eq!(result1[0].line, result2[0].line);
1359        assert_eq!(result1[0].column, result2[0].column);
1360    }
1361
1362    #[test]
1363    fn test_html_comments_not_checked_when_disabled() {
1364        let config = MD044Config {
1365            names: vec!["JavaScript".to_string()],
1366            code_blocks: true,    // Check code blocks
1367            html_elements: true,  // Check HTML elements
1368            html_comments: false, // Don't check HTML comments
1369        };
1370        let rule = MD044ProperNames::from_config_struct(config);
1371
1372        let content = r#"Regular javascript here.
1373<!-- This javascript in HTML comment should be ignored -->
1374More javascript outside."#;
1375
1376        let ctx = create_context(content);
1377        let result = rule.check(&ctx).unwrap();
1378
1379        assert_eq!(result.len(), 2, "Should only flag javascript outside HTML comments");
1380        assert_eq!(result[0].line, 1);
1381        assert_eq!(result[1].line, 3);
1382    }
1383
1384    #[test]
1385    fn test_html_comments_checked_when_enabled() {
1386        let config = MD044Config {
1387            names: vec!["JavaScript".to_string()],
1388            code_blocks: true,   // Check code blocks
1389            html_elements: true, // Check HTML elements
1390            html_comments: true, // Check HTML comments
1391        };
1392        let rule = MD044ProperNames::from_config_struct(config);
1393
1394        let content = r#"Regular javascript here.
1395<!-- This javascript in HTML comment should be checked -->
1396More javascript outside."#;
1397
1398        let ctx = create_context(content);
1399        let result = rule.check(&ctx).unwrap();
1400
1401        assert_eq!(
1402            result.len(),
1403            3,
1404            "Should flag all javascript occurrences including in HTML comments"
1405        );
1406    }
1407
1408    #[test]
1409    fn test_multiline_html_comments() {
1410        let config = MD044Config {
1411            names: vec!["Python".to_string(), "JavaScript".to_string()],
1412            code_blocks: true,    // Check code blocks
1413            html_elements: true,  // Check HTML elements
1414            html_comments: false, // Don't check HTML comments
1415        };
1416        let rule = MD044ProperNames::from_config_struct(config);
1417
1418        let content = r#"Regular python here.
1419<!--
1420This is a multiline comment
1421with javascript and python
1422that should be ignored
1423-->
1424More javascript outside."#;
1425
1426        let ctx = create_context(content);
1427        let result = rule.check(&ctx).unwrap();
1428
1429        assert_eq!(result.len(), 2, "Should only flag names outside HTML comments");
1430        assert_eq!(result[0].line, 1); // python
1431        assert_eq!(result[1].line, 7); // javascript
1432    }
1433
1434    #[test]
1435    fn test_fix_preserves_html_comments_when_disabled() {
1436        let config = MD044Config {
1437            names: vec!["JavaScript".to_string()],
1438            code_blocks: true,    // Check code blocks
1439            html_elements: true,  // Check HTML elements
1440            html_comments: false, // Don't check HTML comments
1441        };
1442        let rule = MD044ProperNames::from_config_struct(config);
1443
1444        let content = r#"javascript here.
1445<!-- javascript in comment -->
1446More javascript."#;
1447
1448        let ctx = create_context(content);
1449        let fixed = rule.fix(&ctx).unwrap();
1450
1451        let expected = r#"JavaScript here.
1452<!-- javascript in comment -->
1453More JavaScript."#;
1454
1455        assert_eq!(
1456            fixed, expected,
1457            "Should not fix names inside HTML comments when disabled"
1458        );
1459    }
1460
1461    #[test]
1462    fn test_proper_names_in_link_text_are_flagged() {
1463        let rule = MD044ProperNames::new(
1464            vec!["JavaScript".to_string(), "Node.js".to_string(), "Python".to_string()],
1465            true,
1466        );
1467
1468        let content = r#"Check this [javascript documentation](https://javascript.info) for info.
1469
1470Visit [node.js homepage](https://nodejs.org) and [python tutorial](https://python.org).
1471
1472Real javascript should be flagged.
1473
1474Also see the [typescript guide][ts-ref] for more.
1475
1476Real python should be flagged too.
1477
1478[ts-ref]: https://typescript.org/handbook"#;
1479
1480        let ctx = create_context(content);
1481        let result = rule.check(&ctx).unwrap();
1482
1483        // Link text should be checked, URLs should not be checked
1484        // Line 1: [javascript documentation] - "javascript" should be flagged
1485        // Line 3: [node.js homepage] - "node.js" should be flagged (matches "Node.js")
1486        // Line 3: [python tutorial] - "python" should be flagged
1487        // Line 5: standalone javascript
1488        // Line 9: standalone python
1489        assert_eq!(result.len(), 5, "Expected 5 warnings: 3 in link text + 2 standalone");
1490
1491        // Verify line numbers for link text warnings
1492        let line_1_warnings: Vec<_> = result.iter().filter(|w| w.line == 1).collect();
1493        assert_eq!(line_1_warnings.len(), 1);
1494        assert!(
1495            line_1_warnings[0]
1496                .message
1497                .contains("'javascript' should be 'JavaScript'")
1498        );
1499
1500        let line_3_warnings: Vec<_> = result.iter().filter(|w| w.line == 3).collect();
1501        assert_eq!(line_3_warnings.len(), 2); // node.js and python
1502
1503        // Standalone warnings
1504        assert!(result.iter().any(|w| w.line == 5 && w.message.contains("'javascript'")));
1505        assert!(result.iter().any(|w| w.line == 9 && w.message.contains("'python'")));
1506    }
1507
1508    #[test]
1509    fn test_link_urls_not_flagged() {
1510        let rule = MD044ProperNames::new(vec!["JavaScript".to_string()], true);
1511
1512        // URL contains "javascript" but should NOT be flagged
1513        let content = r#"[Link Text](https://javascript.info/guide)"#;
1514
1515        let ctx = create_context(content);
1516        let result = rule.check(&ctx).unwrap();
1517
1518        // URL should not be checked
1519        assert!(result.is_empty(), "URLs should not be checked for proper names");
1520    }
1521
1522    #[test]
1523    fn test_proper_names_in_image_alt_text_are_flagged() {
1524        let rule = MD044ProperNames::new(vec!["JavaScript".to_string()], true);
1525
1526        let content = r#"Here is a ![javascript logo](javascript.png "javascript icon") image.
1527
1528Real javascript should be flagged."#;
1529
1530        let ctx = create_context(content);
1531        let result = rule.check(&ctx).unwrap();
1532
1533        // Image alt text should be checked, URL and title should not be checked
1534        // Line 1: ![javascript logo] - "javascript" should be flagged
1535        // Line 3: standalone javascript
1536        assert_eq!(result.len(), 2, "Expected 2 warnings: 1 in alt text + 1 standalone");
1537        assert!(result[0].message.contains("'javascript' should be 'JavaScript'"));
1538        assert!(result[0].line == 1); // "![javascript logo]"
1539        assert!(result[1].message.contains("'javascript' should be 'JavaScript'"));
1540        assert!(result[1].line == 3); // "Real javascript should be flagged."
1541    }
1542
1543    #[test]
1544    fn test_image_urls_not_flagged() {
1545        let rule = MD044ProperNames::new(vec!["JavaScript".to_string()], true);
1546
1547        // URL contains "javascript" but should NOT be flagged
1548        let content = r#"![Logo](https://javascript.info/logo.png)"#;
1549
1550        let ctx = create_context(content);
1551        let result = rule.check(&ctx).unwrap();
1552
1553        // Image URL should not be checked
1554        assert!(result.is_empty(), "Image URLs should not be checked for proper names");
1555    }
1556
1557    #[test]
1558    fn test_reference_link_text_flagged_but_definition_not() {
1559        let rule = MD044ProperNames::new(vec!["JavaScript".to_string(), "TypeScript".to_string()], true);
1560
1561        let content = r#"Check the [javascript guide][js-ref] for details.
1562
1563Real javascript should be flagged.
1564
1565[js-ref]: https://javascript.info/typescript/guide"#;
1566
1567        let ctx = create_context(content);
1568        let result = rule.check(&ctx).unwrap();
1569
1570        // Link text should be checked, reference definitions should not
1571        // Line 1: [javascript guide] - should be flagged
1572        // Line 3: standalone javascript - should be flagged
1573        // Line 5: reference definition - should NOT be flagged
1574        assert_eq!(result.len(), 2, "Expected 2 warnings: 1 in link text + 1 standalone");
1575        assert!(result.iter().any(|w| w.line == 1 && w.message.contains("'javascript'")));
1576        assert!(result.iter().any(|w| w.line == 3 && w.message.contains("'javascript'")));
1577    }
1578
1579    #[test]
1580    fn test_reference_definitions_not_flagged() {
1581        let rule = MD044ProperNames::new(vec!["JavaScript".to_string()], true);
1582
1583        // Reference definition should NOT be flagged
1584        let content = r#"[js-ref]: https://javascript.info/guide"#;
1585
1586        let ctx = create_context(content);
1587        let result = rule.check(&ctx).unwrap();
1588
1589        // Reference definition URLs should not be checked
1590        assert!(result.is_empty(), "Reference definitions should not be checked");
1591    }
1592
1593    #[test]
1594    fn test_wikilinks_text_is_flagged() {
1595        let rule = MD044ProperNames::new(vec!["JavaScript".to_string()], true);
1596
1597        // WikiLinks [[destination]] should have their text checked
1598        let content = r#"[[javascript]]
1599
1600Regular javascript here.
1601
1602[[JavaScript|display text]]"#;
1603
1604        let ctx = create_context(content);
1605        let result = rule.check(&ctx).unwrap();
1606
1607        // Line 1: [[javascript]] - should be flagged (WikiLink text)
1608        // Line 3: standalone javascript - should be flagged
1609        // Line 5: [[JavaScript|display text]] - correct capitalization, no flag
1610        assert_eq!(result.len(), 2, "Expected 2 warnings: 1 in WikiLink + 1 standalone");
1611        assert!(
1612            result
1613                .iter()
1614                .any(|w| w.line == 1 && w.column == 3 && w.message.contains("'javascript'"))
1615        );
1616        assert!(result.iter().any(|w| w.line == 3 && w.message.contains("'javascript'")));
1617    }
1618
1619    #[test]
1620    fn test_url_link_text_not_flagged() {
1621        let rule = MD044ProperNames::new(vec!["GitHub".to_string()], true);
1622
1623        // Link text that is itself a URL should not be flagged
1624        let content = r#"[https://github.com/org/repo](https://github.com/org/repo)
1625
1626[http://github.com/org/repo](http://github.com/org/repo)
1627
1628[www.github.com/org/repo](https://www.github.com/org/repo)"#;
1629
1630        let ctx = create_context(content);
1631        let result = rule.check(&ctx).unwrap();
1632
1633        assert!(
1634            result.is_empty(),
1635            "URL-like link text should not be flagged, got: {result:?}"
1636        );
1637    }
1638
1639    #[test]
1640    fn test_url_link_text_with_leading_space_not_flagged() {
1641        let rule = MD044ProperNames::new(vec!["GitHub".to_string()], true);
1642
1643        // Leading/trailing whitespace in link text should be trimmed before URL check
1644        let content = r#"[ https://github.com/org/repo](https://github.com/org/repo)"#;
1645
1646        let ctx = create_context(content);
1647        let result = rule.check(&ctx).unwrap();
1648
1649        assert!(
1650            result.is_empty(),
1651            "URL-like link text with leading space should not be flagged, got: {result:?}"
1652        );
1653    }
1654
1655    #[test]
1656    fn test_url_link_text_uppercase_scheme_not_flagged() {
1657        let rule = MD044ProperNames::new(vec!["GitHub".to_string()], true);
1658
1659        let content = r#"[HTTPS://GITHUB.COM/org/repo](https://github.com/org/repo)"#;
1660
1661        let ctx = create_context(content);
1662        let result = rule.check(&ctx).unwrap();
1663
1664        assert!(
1665            result.is_empty(),
1666            "URL-like link text with uppercase scheme should not be flagged, got: {result:?}"
1667        );
1668    }
1669
1670    #[test]
1671    fn test_non_url_link_text_still_flagged() {
1672        let rule = MD044ProperNames::new(vec!["GitHub".to_string()], true);
1673
1674        // Link text that is NOT a URL should still be flagged
1675        let content = r#"[github.com/org/repo](https://github.com/org/repo)
1676
1677[Visit github](https://github.com/org/repo)
1678
1679[//github.com/org/repo](//github.com/org/repo)
1680
1681[ftp://github.com/org/repo](ftp://github.com/org/repo)"#;
1682
1683        let ctx = create_context(content);
1684        let result = rule.check(&ctx).unwrap();
1685
1686        assert_eq!(result.len(), 4, "Non-URL link text should be flagged, got: {result:?}");
1687        assert!(result.iter().any(|w| w.line == 1)); // github.com (no protocol)
1688        assert!(result.iter().any(|w| w.line == 3)); // Visit github
1689        assert!(result.iter().any(|w| w.line == 5)); // //github.com (protocol-relative)
1690        assert!(result.iter().any(|w| w.line == 7)); // ftp://github.com
1691    }
1692
1693    #[test]
1694    fn test_url_link_text_fix_not_applied() {
1695        let rule = MD044ProperNames::new(vec!["GitHub".to_string()], true);
1696
1697        let content = "[https://github.com/org/repo](https://github.com/org/repo)\n";
1698
1699        let ctx = create_context(content);
1700        let result = rule.fix(&ctx).unwrap();
1701
1702        assert_eq!(result, content, "Fix should not modify URL-like link text");
1703    }
1704
1705    #[test]
1706    fn test_mixed_url_and_regular_link_text() {
1707        let rule = MD044ProperNames::new(vec!["GitHub".to_string()], true);
1708
1709        // Mix of URL link text (should skip) and regular text (should flag)
1710        let content = r#"[https://github.com/org/repo](https://github.com/org/repo)
1711
1712Visit [github documentation](https://github.com/docs) for details.
1713
1714[www.github.com/pricing](https://www.github.com/pricing)"#;
1715
1716        let ctx = create_context(content);
1717        let result = rule.check(&ctx).unwrap();
1718
1719        // Only line 3 should be flagged ("github documentation" is not a URL)
1720        assert_eq!(
1721            result.len(),
1722            1,
1723            "Only non-URL link text should be flagged, got: {result:?}"
1724        );
1725        assert_eq!(result[0].line, 3);
1726    }
1727
1728    #[test]
1729    fn test_html_attribute_values_not_flagged() {
1730        // Matches inside HTML tag attributes (between `<` and `>`) are not flagged.
1731        // Attribute values are not prose — they hold URLs, class names, data values, etc.
1732        let rule = MD044ProperNames::new(vec!["Test".to_string()], true);
1733        let content = "# Heading\n\ntest\n\n<img src=\"www.example.test/test_image.png\">\n";
1734        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1735        let result = rule.check(&ctx).unwrap();
1736
1737        // Nothing on line 5 should be flagged — everything is inside the `<img ...>` tag
1738        let line5_violations: Vec<_> = result.iter().filter(|w| w.line == 5).collect();
1739        assert!(
1740            line5_violations.is_empty(),
1741            "Should not flag anything inside HTML tag attributes: {line5_violations:?}"
1742        );
1743
1744        // Plain text on line 3 is still flagged
1745        let line3_violations: Vec<_> = result.iter().filter(|w| w.line == 3).collect();
1746        assert_eq!(line3_violations.len(), 1, "Plain 'test' on line 3 should be flagged");
1747    }
1748
1749    #[test]
1750    fn test_html_text_content_still_flagged() {
1751        // Text between HTML tags (not inside `<...>`) is still checked.
1752        let rule = MD044ProperNames::new(vec!["Test".to_string()], true);
1753        let content = "# Heading\n\n<a href=\"https://example.test/page\">test link</a>\n";
1754        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1755        let result = rule.check(&ctx).unwrap();
1756
1757        // "example.test" in the href attribute → not flagged (inside `<...>`)
1758        // "test link" in the anchor text → flagged (between `>` and `<`)
1759        assert_eq!(
1760            result.len(),
1761            1,
1762            "Should flag only 'test' in anchor text, not in href: {result:?}"
1763        );
1764        assert_eq!(result[0].column, 37, "Should flag col 37 ('test link' in anchor text)");
1765    }
1766
1767    #[test]
1768    fn test_html_attribute_various_not_flagged() {
1769        // All attribute types are ignored: src, href, alt, class, data-*, title, etc.
1770        let rule = MD044ProperNames::new(vec!["Test".to_string()], true);
1771        let content = concat!(
1772            "# Heading\n\n",
1773            "<img src=\"test.png\" alt=\"test image\">\n",
1774            "<span class=\"test-class\" data-test=\"value\">test content</span>\n",
1775        );
1776        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1777        let result = rule.check(&ctx).unwrap();
1778
1779        // Only "test content" (between tags on line 4) should be flagged
1780        assert_eq!(
1781            result.len(),
1782            1,
1783            "Should flag only 'test content' between tags: {result:?}"
1784        );
1785        assert_eq!(result[0].line, 4);
1786    }
1787
1788    #[test]
1789    fn test_plain_text_underscore_boundary_unchanged() {
1790        // Plain text (outside HTML tags) still uses original word boundary semantics where
1791        // underscore is a boundary character, matching markdownlint's behavior via AST splitting.
1792        let rule = MD044ProperNames::new(vec!["Test".to_string()], true);
1793        let content = "# Heading\n\ntest_image is here and just_test ends here\n";
1794        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1795        let result = rule.check(&ctx).unwrap();
1796
1797        // Both "test_image" (test at start) and "just_test" (test at end) are flagged
1798        // because in plain text, "_" is a word boundary
1799        assert_eq!(
1800            result.len(),
1801            2,
1802            "Should flag 'test' in both 'test_image' and 'just_test': {result:?}"
1803        );
1804        let cols: Vec<usize> = result.iter().map(|w| w.column).collect();
1805        assert!(cols.contains(&1), "Should flag col 1 (test_image): {cols:?}");
1806        assert!(cols.contains(&29), "Should flag col 29 (just_test): {cols:?}");
1807    }
1808
1809    #[test]
1810    fn test_frontmatter_yaml_keys_not_flagged() {
1811        // YAML keys in frontmatter should NOT be checked for proper name violations.
1812        // Only values should be checked.
1813        let rule = MD044ProperNames::new(vec!["Test".to_string()], true);
1814
1815        let content = "---\ntitle: Heading\ntest: Some Test value\n---\n\nTest\n";
1816        let ctx = create_context(content);
1817        let result = rule.check(&ctx).unwrap();
1818
1819        // "test" in the YAML key (line 3) should NOT be flagged
1820        // "Test" in the YAML value (line 3) is correct capitalization, no flag
1821        // "Test" in body (line 6) is correct capitalization, no flag
1822        assert!(
1823            result.is_empty(),
1824            "Should not flag YAML keys or correctly capitalized values: {result:?}"
1825        );
1826    }
1827
1828    #[test]
1829    fn test_frontmatter_yaml_values_flagged() {
1830        // Incorrectly capitalized names in YAML values should be flagged.
1831        let rule = MD044ProperNames::new(vec!["Test".to_string()], true);
1832
1833        let content = "---\ntitle: Heading\nkey: a test value\n---\n\nTest\n";
1834        let ctx = create_context(content);
1835        let result = rule.check(&ctx).unwrap();
1836
1837        // "test" in the YAML value (line 3) SHOULD be flagged
1838        assert_eq!(result.len(), 1, "Should flag 'test' in YAML value: {result:?}");
1839        assert_eq!(result[0].line, 3);
1840        assert_eq!(result[0].column, 8); // "key: a " = 7 chars, then "test" at column 8
1841    }
1842
1843    #[test]
1844    fn test_frontmatter_key_matches_name_not_flagged() {
1845        // A YAML key that happens to match a configured name should NOT be flagged.
1846        let rule = MD044ProperNames::new(vec!["Test".to_string()], true);
1847
1848        let content = "---\ntest: other value\n---\n\nBody text\n";
1849        let ctx = create_context(content);
1850        let result = rule.check(&ctx).unwrap();
1851
1852        assert!(
1853            result.is_empty(),
1854            "Should not flag YAML key that matches configured name: {result:?}"
1855        );
1856    }
1857
1858    #[test]
1859    fn test_frontmatter_empty_value_not_flagged() {
1860        // YAML key with no value should be skipped entirely.
1861        let rule = MD044ProperNames::new(vec!["Test".to_string()], true);
1862
1863        let content = "---\ntest:\ntest: \n---\n\nBody text\n";
1864        let ctx = create_context(content);
1865        let result = rule.check(&ctx).unwrap();
1866
1867        assert!(
1868            result.is_empty(),
1869            "Should not flag YAML keys with empty values: {result:?}"
1870        );
1871    }
1872
1873    #[test]
1874    fn test_frontmatter_nested_yaml_key_not_flagged() {
1875        // Nested/indented YAML keys should also be skipped.
1876        let rule = MD044ProperNames::new(vec!["Test".to_string()], true);
1877
1878        let content = "---\nparent:\n  test: nested value\n---\n\nBody text\n";
1879        let ctx = create_context(content);
1880        let result = rule.check(&ctx).unwrap();
1881
1882        // "test" as a nested key should NOT be flagged
1883        assert!(result.is_empty(), "Should not flag nested YAML keys: {result:?}");
1884    }
1885
1886    #[test]
1887    fn test_frontmatter_list_items_checked() {
1888        // YAML list items are values and should be checked for proper names.
1889        let rule = MD044ProperNames::new(vec!["Test".to_string()], true);
1890
1891        let content = "---\ntags:\n  - test\n  - other\n---\n\nBody text\n";
1892        let ctx = create_context(content);
1893        let result = rule.check(&ctx).unwrap();
1894
1895        // "test" as a list item value SHOULD be flagged
1896        assert_eq!(result.len(), 1, "Should flag 'test' in YAML list item: {result:?}");
1897        assert_eq!(result[0].line, 3);
1898    }
1899
1900    #[test]
1901    fn test_frontmatter_value_with_multiple_colons() {
1902        // For "key: value: more", key is before first colon.
1903        let rule = MD044ProperNames::new(vec!["Test".to_string()], true);
1904
1905        let content = "---\ntest: description: a test thing\n---\n\nBody text\n";
1906        let ctx = create_context(content);
1907        let result = rule.check(&ctx).unwrap();
1908
1909        // "test" as key should NOT be flagged
1910        // "test" in value portion ("description: a test thing") SHOULD be flagged
1911        assert_eq!(
1912            result.len(),
1913            1,
1914            "Should flag 'test' in value after first colon: {result:?}"
1915        );
1916        assert_eq!(result[0].line, 2);
1917        assert!(result[0].column > 6, "Violation column should be in value portion");
1918    }
1919
1920    #[test]
1921    fn test_frontmatter_does_not_affect_body() {
1922        // Body text after frontmatter should still be fully checked.
1923        let rule = MD044ProperNames::new(vec!["Test".to_string()], true);
1924
1925        let content = "---\ntitle: Heading\n---\n\ntest should be flagged here\n";
1926        let ctx = create_context(content);
1927        let result = rule.check(&ctx).unwrap();
1928
1929        assert_eq!(result.len(), 1, "Should flag 'test' in body text: {result:?}");
1930        assert_eq!(result[0].line, 5);
1931    }
1932
1933    #[test]
1934    fn test_frontmatter_fix_corrects_values_preserves_keys() {
1935        // Fix should correct YAML values but preserve keys.
1936        let rule = MD044ProperNames::new(vec!["Test".to_string()], true);
1937
1938        let content = "---\ntest: a test value\n---\n\ntest here\n";
1939        let ctx = create_context(content);
1940        let fixed = rule.fix(&ctx).unwrap();
1941
1942        // Key "test" should remain lowercase; value "test" should become "Test"
1943        assert_eq!(fixed, "---\ntest: a Test value\n---\n\nTest here\n");
1944    }
1945
1946    #[test]
1947    fn test_frontmatter_multiword_value_flagged() {
1948        // Multiple proper names in a single YAML value should all be flagged.
1949        let rule = MD044ProperNames::new(vec!["JavaScript".to_string(), "TypeScript".to_string()], true);
1950
1951        let content = "---\ndescription: Learn javascript and typescript\n---\n\nBody\n";
1952        let ctx = create_context(content);
1953        let result = rule.check(&ctx).unwrap();
1954
1955        assert_eq!(result.len(), 2, "Should flag both names in YAML value: {result:?}");
1956        assert!(result.iter().all(|w| w.line == 2));
1957    }
1958
1959    #[test]
1960    fn test_frontmatter_yaml_comments_not_checked() {
1961        // YAML comments inside frontmatter should be skipped entirely.
1962        let rule = MD044ProperNames::new(vec!["Test".to_string()], true);
1963
1964        let content = "---\n# test comment\ntitle: Heading\n---\n\nBody text\n";
1965        let ctx = create_context(content);
1966        let result = rule.check(&ctx).unwrap();
1967
1968        assert!(result.is_empty(), "Should not flag names in YAML comments: {result:?}");
1969    }
1970
1971    #[test]
1972    fn test_frontmatter_delimiters_not_checked() {
1973        // Frontmatter delimiter lines (--- or +++) should never be checked.
1974        let rule = MD044ProperNames::new(vec!["Test".to_string()], true);
1975
1976        let content = "---\ntitle: Heading\n---\n\ntest here\n";
1977        let ctx = create_context(content);
1978        let result = rule.check(&ctx).unwrap();
1979
1980        // Only the body "test" on line 5 should be flagged
1981        assert_eq!(result.len(), 1, "Should only flag body text: {result:?}");
1982        assert_eq!(result[0].line, 5);
1983    }
1984
1985    #[test]
1986    fn test_frontmatter_continuation_lines_checked() {
1987        // Continuation lines (indented, no colon) are value content and should be checked.
1988        let rule = MD044ProperNames::new(vec!["Test".to_string()], true);
1989
1990        let content = "---\ndescription: >\n  a test value\n  continued here\n---\n\nBody\n";
1991        let ctx = create_context(content);
1992        let result = rule.check(&ctx).unwrap();
1993
1994        // "test" on the continuation line should be flagged
1995        assert_eq!(result.len(), 1, "Should flag 'test' in continuation line: {result:?}");
1996        assert_eq!(result[0].line, 3);
1997    }
1998
1999    #[test]
2000    fn test_frontmatter_quoted_values_checked() {
2001        // Quoted YAML values should have their content checked (inside the quotes).
2002        let rule = MD044ProperNames::new(vec!["Test".to_string()], true);
2003
2004        let content = "---\ntitle: \"a test title\"\n---\n\nBody\n";
2005        let ctx = create_context(content);
2006        let result = rule.check(&ctx).unwrap();
2007
2008        assert_eq!(result.len(), 1, "Should flag 'test' in quoted YAML value: {result:?}");
2009        assert_eq!(result[0].line, 2);
2010    }
2011
2012    #[test]
2013    fn test_frontmatter_single_quoted_values_checked() {
2014        // Single-quoted YAML values should have their content checked.
2015        let rule = MD044ProperNames::new(vec!["Test".to_string()], true);
2016
2017        let content = "---\ntitle: 'a test title'\n---\n\nBody\n";
2018        let ctx = create_context(content);
2019        let result = rule.check(&ctx).unwrap();
2020
2021        assert_eq!(
2022            result.len(),
2023            1,
2024            "Should flag 'test' in single-quoted YAML value: {result:?}"
2025        );
2026        assert_eq!(result[0].line, 2);
2027    }
2028
2029    #[test]
2030    fn test_frontmatter_fix_multiword_values() {
2031        // Fix should correct all proper names in frontmatter values.
2032        let rule = MD044ProperNames::new(vec!["JavaScript".to_string(), "TypeScript".to_string()], true);
2033
2034        let content = "---\ndescription: Learn javascript and typescript\n---\n\nBody\n";
2035        let ctx = create_context(content);
2036        let fixed = rule.fix(&ctx).unwrap();
2037
2038        assert_eq!(
2039            fixed,
2040            "---\ndescription: Learn JavaScript and TypeScript\n---\n\nBody\n"
2041        );
2042    }
2043
2044    #[test]
2045    fn test_frontmatter_fix_preserves_yaml_structure() {
2046        // Fix should preserve YAML structure while correcting values.
2047        let rule = MD044ProperNames::new(vec!["Test".to_string()], true);
2048
2049        let content = "---\ntags:\n  - test\n  - other\ntitle: a test doc\n---\n\ntest body\n";
2050        let ctx = create_context(content);
2051        let fixed = rule.fix(&ctx).unwrap();
2052
2053        assert_eq!(
2054            fixed,
2055            "---\ntags:\n  - Test\n  - other\ntitle: a Test doc\n---\n\nTest body\n"
2056        );
2057    }
2058
2059    #[test]
2060    fn test_frontmatter_toml_delimiters_not_checked() {
2061        // TOML frontmatter with +++ delimiters should also be handled.
2062        let rule = MD044ProperNames::new(vec!["Test".to_string()], true);
2063
2064        let content = "+++\ntitle = \"a test title\"\n+++\n\ntest body\n";
2065        let ctx = create_context(content);
2066        let result = rule.check(&ctx).unwrap();
2067
2068        // "title" as TOML key should NOT be flagged
2069        // "test" in TOML quoted value SHOULD be flagged (line 2)
2070        // "test" in body SHOULD be flagged (line 5)
2071        assert_eq!(result.len(), 2, "Should flag TOML value and body: {result:?}");
2072        let fm_violations: Vec<_> = result.iter().filter(|w| w.line == 2).collect();
2073        assert_eq!(fm_violations.len(), 1, "Should flag 'test' in TOML value: {result:?}");
2074        let body_violations: Vec<_> = result.iter().filter(|w| w.line == 5).collect();
2075        assert_eq!(body_violations.len(), 1, "Should flag body 'test': {result:?}");
2076    }
2077
2078    #[test]
2079    fn test_frontmatter_toml_key_not_flagged() {
2080        // TOML keys should NOT be flagged, only values.
2081        let rule = MD044ProperNames::new(vec!["Test".to_string()], true);
2082
2083        let content = "+++\ntest = \"other value\"\n+++\n\nBody text\n";
2084        let ctx = create_context(content);
2085        let result = rule.check(&ctx).unwrap();
2086
2087        assert!(
2088            result.is_empty(),
2089            "Should not flag TOML key that matches configured name: {result:?}"
2090        );
2091    }
2092
2093    #[test]
2094    fn test_frontmatter_toml_fix_preserves_keys() {
2095        // Fix should correct TOML values but preserve keys.
2096        let rule = MD044ProperNames::new(vec!["Test".to_string()], true);
2097
2098        let content = "+++\ntest = \"a test value\"\n+++\n\ntest here\n";
2099        let ctx = create_context(content);
2100        let fixed = rule.fix(&ctx).unwrap();
2101
2102        // Key "test" should remain lowercase; value "test" should become "Test"
2103        assert_eq!(fixed, "+++\ntest = \"a Test value\"\n+++\n\nTest here\n");
2104    }
2105
2106    #[test]
2107    fn test_frontmatter_list_item_mapping_key_not_flagged() {
2108        // In "- test: nested value", "test" is a YAML key within a list-item mapping.
2109        // The key should NOT be flagged; only the value should be checked.
2110        let rule = MD044ProperNames::new(vec!["Test".to_string()], true);
2111
2112        let content = "---\nitems:\n  - test: nested value\n---\n\nBody text\n";
2113        let ctx = create_context(content);
2114        let result = rule.check(&ctx).unwrap();
2115
2116        assert!(
2117            result.is_empty(),
2118            "Should not flag YAML key in list-item mapping: {result:?}"
2119        );
2120    }
2121
2122    #[test]
2123    fn test_frontmatter_list_item_mapping_value_flagged() {
2124        // In "- key: test value", the value portion should be checked.
2125        let rule = MD044ProperNames::new(vec!["Test".to_string()], true);
2126
2127        let content = "---\nitems:\n  - key: a test value\n---\n\nBody text\n";
2128        let ctx = create_context(content);
2129        let result = rule.check(&ctx).unwrap();
2130
2131        assert_eq!(
2132            result.len(),
2133            1,
2134            "Should flag 'test' in list-item mapping value: {result:?}"
2135        );
2136        assert_eq!(result[0].line, 3);
2137    }
2138
2139    #[test]
2140    fn test_frontmatter_bare_list_item_still_flagged() {
2141        // Bare list items without a colon (e.g., "- test") are values and should be flagged.
2142        let rule = MD044ProperNames::new(vec!["Test".to_string()], true);
2143
2144        let content = "---\ntags:\n  - test\n  - other\n---\n\nBody text\n";
2145        let ctx = create_context(content);
2146        let result = rule.check(&ctx).unwrap();
2147
2148        assert_eq!(result.len(), 1, "Should flag 'test' in bare list item: {result:?}");
2149        assert_eq!(result[0].line, 3);
2150    }
2151
2152    #[test]
2153    fn test_frontmatter_flow_mapping_not_flagged() {
2154        // Flow mappings like {test: value} contain YAML keys that should not be flagged.
2155        // The entire flow construct should be skipped.
2156        let rule = MD044ProperNames::new(vec!["Test".to_string()], true);
2157
2158        let content = "---\nflow_map: {test: value, other: test}\n---\n\nBody text\n";
2159        let ctx = create_context(content);
2160        let result = rule.check(&ctx).unwrap();
2161
2162        assert!(
2163            result.is_empty(),
2164            "Should not flag names inside flow mappings: {result:?}"
2165        );
2166    }
2167
2168    #[test]
2169    fn test_frontmatter_flow_sequence_not_flagged() {
2170        // Flow sequences like [test, other] should also be skipped.
2171        let rule = MD044ProperNames::new(vec!["Test".to_string()], true);
2172
2173        let content = "---\nitems: [test, other, test]\n---\n\nBody text\n";
2174        let ctx = create_context(content);
2175        let result = rule.check(&ctx).unwrap();
2176
2177        assert!(
2178            result.is_empty(),
2179            "Should not flag names inside flow sequences: {result:?}"
2180        );
2181    }
2182
2183    #[test]
2184    fn test_frontmatter_list_item_mapping_fix_preserves_key() {
2185        // Fix should correct values in list-item mappings but preserve keys.
2186        let rule = MD044ProperNames::new(vec!["Test".to_string()], true);
2187
2188        let content = "---\nitems:\n  - test: a test value\n---\n\ntest here\n";
2189        let ctx = create_context(content);
2190        let fixed = rule.fix(&ctx).unwrap();
2191
2192        // "test" as list-item key should remain lowercase;
2193        // "test" in value portion should become "Test"
2194        assert_eq!(fixed, "---\nitems:\n  - test: a Test value\n---\n\nTest here\n");
2195    }
2196
2197    #[test]
2198    fn test_frontmatter_backtick_code_not_flagged() {
2199        // Names inside backticks in frontmatter should NOT be flagged when code_blocks=false.
2200        let config = MD044Config {
2201            names: vec!["GoodApplication".to_string()],
2202            code_blocks: false,
2203            ..MD044Config::default()
2204        };
2205        let rule = MD044ProperNames::from_config_struct(config);
2206
2207        let content = "---\ntitle: \"`goodapplication` CLI\"\n---\n\nIntroductory `goodapplication` CLI text.\n";
2208        let ctx = create_context(content);
2209        let result = rule.check(&ctx).unwrap();
2210
2211        // Neither the frontmatter nor the body backtick-wrapped name should be flagged
2212        assert!(
2213            result.is_empty(),
2214            "Should not flag names inside backticks in frontmatter or body: {result:?}"
2215        );
2216    }
2217
2218    #[test]
2219    fn test_frontmatter_unquoted_backtick_code_not_flagged() {
2220        // Exact case from issue #513: unquoted YAML frontmatter with backticks
2221        let config = MD044Config {
2222            names: vec!["GoodApplication".to_string()],
2223            code_blocks: false,
2224            ..MD044Config::default()
2225        };
2226        let rule = MD044ProperNames::from_config_struct(config);
2227
2228        let content = "---\ntitle: `goodapplication` CLI\n---\n\nIntroductory `goodapplication` CLI text.\n";
2229        let ctx = create_context(content);
2230        let result = rule.check(&ctx).unwrap();
2231
2232        assert!(
2233            result.is_empty(),
2234            "Should not flag names inside backticks in unquoted YAML frontmatter: {result:?}"
2235        );
2236    }
2237
2238    #[test]
2239    fn test_frontmatter_bare_name_still_flagged_with_backtick_nearby() {
2240        // Names outside backticks in frontmatter should still be flagged.
2241        let config = MD044Config {
2242            names: vec!["GoodApplication".to_string()],
2243            code_blocks: false,
2244            ..MD044Config::default()
2245        };
2246        let rule = MD044ProperNames::from_config_struct(config);
2247
2248        let content = "---\ntitle: goodapplication `goodapplication` CLI\n---\n\nBody\n";
2249        let ctx = create_context(content);
2250        let result = rule.check(&ctx).unwrap();
2251
2252        // Only the bare "goodapplication" (before backticks) should be flagged
2253        assert_eq!(
2254            result.len(),
2255            1,
2256            "Should flag bare name but not backtick-wrapped name: {result:?}"
2257        );
2258        assert_eq!(result[0].line, 2);
2259        assert_eq!(result[0].column, 8); // "title: " = 7 chars, name at column 8
2260    }
2261
2262    #[test]
2263    fn test_frontmatter_backtick_code_with_code_blocks_true() {
2264        // When code_blocks=true, names inside backticks ARE checked.
2265        let config = MD044Config {
2266            names: vec!["GoodApplication".to_string()],
2267            code_blocks: true,
2268            ..MD044Config::default()
2269        };
2270        let rule = MD044ProperNames::from_config_struct(config);
2271
2272        let content = "---\ntitle: \"`goodapplication` CLI\"\n---\n\nBody\n";
2273        let ctx = create_context(content);
2274        let result = rule.check(&ctx).unwrap();
2275
2276        // With code_blocks=true, backtick-wrapped name SHOULD be flagged
2277        assert_eq!(
2278            result.len(),
2279            1,
2280            "Should flag backtick-wrapped name when code_blocks=true: {result:?}"
2281        );
2282        assert_eq!(result[0].line, 2);
2283    }
2284
2285    #[test]
2286    fn test_frontmatter_fix_preserves_backtick_code() {
2287        // Fix should NOT change names inside backticks in frontmatter.
2288        let config = MD044Config {
2289            names: vec!["GoodApplication".to_string()],
2290            code_blocks: false,
2291            ..MD044Config::default()
2292        };
2293        let rule = MD044ProperNames::from_config_struct(config);
2294
2295        let content = "---\ntitle: \"`goodapplication` CLI\"\n---\n\nIntroductory `goodapplication` CLI text.\n";
2296        let ctx = create_context(content);
2297        let fixed = rule.fix(&ctx).unwrap();
2298
2299        // Neither backtick-wrapped occurrence should be changed
2300        assert_eq!(
2301            fixed, content,
2302            "Fix should not modify names inside backticks in frontmatter"
2303        );
2304    }
2305
2306    // --- Angle-bracket URL tests (issue #457) ---
2307
2308    #[test]
2309    fn test_angle_bracket_url_in_html_comment_not_flagged() {
2310        // Angle-bracket URLs inside HTML comments should be skipped
2311        let config = MD044Config {
2312            names: vec!["Test".to_string()],
2313            ..MD044Config::default()
2314        };
2315        let rule = MD044ProperNames::from_config_struct(config);
2316
2317        let content = "---\ntitle: Level 1 heading\n---\n\n<https://www.example.test>\n\n<!-- This is a Test https://www.example.test -->\n<!-- This is a Test <https://www.example.test> -->\n";
2318        let ctx = create_context(content);
2319        let result = rule.check(&ctx).unwrap();
2320
2321        // Line 7: "Test" in comment prose before bare URL -- already correct capitalization
2322        // Line 7: "test" in bare URL (not in angle brackets) -- but "test" is in URL domain, not prose.
2323        //   However, .example.test has "test" at a word boundary (after '.'), so it IS flagged.
2324        // Line 8: "Test" in comment prose -- correct capitalization, not flagged
2325        // Line 8: "test" in <https://www.example.test> -- inside angle-bracket URL, NOT flagged
2326
2327        // The key assertion: line 8's angle-bracket URL should NOT produce a warning
2328        let line8_warnings: Vec<_> = result.iter().filter(|w| w.line == 8).collect();
2329        assert!(
2330            line8_warnings.is_empty(),
2331            "Should not flag names inside angle-bracket URLs in HTML comments: {line8_warnings:?}"
2332        );
2333    }
2334
2335    #[test]
2336    fn test_bare_url_in_html_comment_still_flagged() {
2337        // Bare URLs (not in angle brackets) inside HTML comments should still be checked
2338        let config = MD044Config {
2339            names: vec!["Test".to_string()],
2340            ..MD044Config::default()
2341        };
2342        let rule = MD044ProperNames::from_config_struct(config);
2343
2344        let content = "<!-- This is a test https://www.example.test -->\n";
2345        let ctx = create_context(content);
2346        let result = rule.check(&ctx).unwrap();
2347
2348        // "test" appears as prose text before URL and also in the bare URL domain
2349        // At minimum, the prose "test" should be flagged
2350        assert!(
2351            !result.is_empty(),
2352            "Should flag 'test' in prose text of HTML comment with bare URL"
2353        );
2354    }
2355
2356    #[test]
2357    fn test_angle_bracket_url_in_regular_markdown_not_flagged() {
2358        // Angle-bracket URLs in regular markdown are already handled by the link parser,
2359        // but the angle-bracket check provides a safety net
2360        let rule = MD044ProperNames::new(vec!["Test".to_string()], true);
2361
2362        let content = "<https://www.example.test>\n";
2363        let ctx = create_context(content);
2364        let result = rule.check(&ctx).unwrap();
2365
2366        assert!(
2367            result.is_empty(),
2368            "Should not flag names inside angle-bracket URLs in regular markdown: {result:?}"
2369        );
2370    }
2371
2372    #[test]
2373    fn test_multiple_angle_bracket_urls_in_one_comment() {
2374        let config = MD044Config {
2375            names: vec!["Test".to_string()],
2376            ..MD044Config::default()
2377        };
2378        let rule = MD044ProperNames::from_config_struct(config);
2379
2380        let content = "<!-- See <https://test.example.com> and <https://www.example.test> for details -->\n";
2381        let ctx = create_context(content);
2382        let result = rule.check(&ctx).unwrap();
2383
2384        // Both URLs are inside angle brackets, so "test" inside them should NOT be flagged
2385        assert!(
2386            result.is_empty(),
2387            "Should not flag names inside multiple angle-bracket URLs: {result:?}"
2388        );
2389    }
2390
2391    #[test]
2392    fn test_angle_bracket_non_url_still_flagged() {
2393        // <Test> is NOT a URL (no scheme), so is_in_angle_bracket_url does NOT protect it.
2394        // Whether it gets flagged depends on HTML tag detection, not on our URL check.
2395        assert!(
2396            !MD044ProperNames::is_in_angle_bracket_url("<test> which is not a URL.", 1),
2397            "is_in_angle_bracket_url should return false for non-URL angle brackets"
2398        );
2399    }
2400
2401    #[test]
2402    fn test_angle_bracket_mailto_url_not_flagged() {
2403        let config = MD044Config {
2404            names: vec!["Test".to_string()],
2405            ..MD044Config::default()
2406        };
2407        let rule = MD044ProperNames::from_config_struct(config);
2408
2409        let content = "<!-- Contact <mailto:test@example.com> for help -->\n";
2410        let ctx = create_context(content);
2411        let result = rule.check(&ctx).unwrap();
2412
2413        assert!(
2414            result.is_empty(),
2415            "Should not flag names inside angle-bracket mailto URLs: {result:?}"
2416        );
2417    }
2418
2419    #[test]
2420    fn test_angle_bracket_ftp_url_not_flagged() {
2421        let config = MD044Config {
2422            names: vec!["Test".to_string()],
2423            ..MD044Config::default()
2424        };
2425        let rule = MD044ProperNames::from_config_struct(config);
2426
2427        let content = "<!-- Download from <ftp://test.example.com/file> -->\n";
2428        let ctx = create_context(content);
2429        let result = rule.check(&ctx).unwrap();
2430
2431        assert!(
2432            result.is_empty(),
2433            "Should not flag names inside angle-bracket FTP URLs: {result:?}"
2434        );
2435    }
2436
2437    #[test]
2438    fn test_angle_bracket_url_fix_preserves_url() {
2439        // Fix should not modify text inside angle-bracket URLs
2440        let config = MD044Config {
2441            names: vec!["Test".to_string()],
2442            ..MD044Config::default()
2443        };
2444        let rule = MD044ProperNames::from_config_struct(config);
2445
2446        let content = "<!-- test text <https://www.example.test> -->\n";
2447        let ctx = create_context(content);
2448        let fixed = rule.fix(&ctx).unwrap();
2449
2450        // "test" in prose should be fixed, URL should be preserved
2451        assert!(
2452            fixed.contains("<https://www.example.test>"),
2453            "Fix should preserve angle-bracket URLs: {fixed}"
2454        );
2455        assert!(
2456            fixed.contains("Test text"),
2457            "Fix should correct prose 'test' to 'Test': {fixed}"
2458        );
2459    }
2460
2461    #[test]
2462    fn test_is_in_angle_bracket_url_helper() {
2463        // Direct tests of the helper function
2464        let line = "text <https://example.test> more text";
2465
2466        // Inside the URL
2467        assert!(MD044ProperNames::is_in_angle_bracket_url(line, 5)); // '<'
2468        assert!(MD044ProperNames::is_in_angle_bracket_url(line, 6)); // 'h'
2469        assert!(MD044ProperNames::is_in_angle_bracket_url(line, 15)); // middle of URL
2470        assert!(MD044ProperNames::is_in_angle_bracket_url(line, 26)); // '>'
2471
2472        // Outside the URL
2473        assert!(!MD044ProperNames::is_in_angle_bracket_url(line, 0)); // 't' at start
2474        assert!(!MD044ProperNames::is_in_angle_bracket_url(line, 4)); // space before '<'
2475        assert!(!MD044ProperNames::is_in_angle_bracket_url(line, 27)); // space after '>'
2476
2477        // Non-URL angle brackets
2478        assert!(!MD044ProperNames::is_in_angle_bracket_url("<notaurl>", 1));
2479
2480        // mailto scheme
2481        assert!(MD044ProperNames::is_in_angle_bracket_url(
2482            "<mailto:test@example.com>",
2483            10
2484        ));
2485
2486        // ftp scheme
2487        assert!(MD044ProperNames::is_in_angle_bracket_url(
2488            "<ftp://test.example.com>",
2489            10
2490        ));
2491    }
2492
2493    #[test]
2494    fn test_is_in_angle_bracket_url_uppercase_scheme() {
2495        // RFC 3986: URI schemes are case-insensitive
2496        assert!(MD044ProperNames::is_in_angle_bracket_url(
2497            "<HTTPS://test.example.com>",
2498            10
2499        ));
2500        assert!(MD044ProperNames::is_in_angle_bracket_url(
2501            "<Http://test.example.com>",
2502            10
2503        ));
2504    }
2505
2506    #[test]
2507    fn test_is_in_angle_bracket_url_uncommon_schemes() {
2508        // ssh scheme
2509        assert!(MD044ProperNames::is_in_angle_bracket_url(
2510            "<ssh://test@example.com>",
2511            10
2512        ));
2513        // file scheme
2514        assert!(MD044ProperNames::is_in_angle_bracket_url("<file:///test/path>", 10));
2515        // data scheme (no authority, just colon)
2516        assert!(MD044ProperNames::is_in_angle_bracket_url("<data:text/plain;test>", 10));
2517    }
2518
2519    #[test]
2520    fn test_is_in_angle_bracket_url_unclosed() {
2521        // Unclosed angle bracket should NOT match
2522        assert!(!MD044ProperNames::is_in_angle_bracket_url(
2523            "<https://test.example.com",
2524            10
2525        ));
2526    }
2527
2528    #[test]
2529    fn test_vale_inline_config_comments_not_flagged() {
2530        let config = MD044Config {
2531            names: vec!["Vale".to_string(), "JavaScript".to_string()],
2532            ..MD044Config::default()
2533        };
2534        let rule = MD044ProperNames::from_config_struct(config);
2535
2536        let content = "\
2537<!-- vale off -->
2538Some javascript text here.
2539<!-- vale on -->
2540<!-- vale Style.Rule = NO -->
2541More javascript text.
2542<!-- vale Style.Rule = YES -->
2543<!-- vale JavaScript.Grammar = NO -->
2544";
2545        let ctx = create_context(content);
2546        let result = rule.check(&ctx).unwrap();
2547
2548        // Only the body text lines (2, 5) should be flagged for "javascript"
2549        assert_eq!(result.len(), 2, "Should only flag body lines, not Vale config comments");
2550        assert_eq!(result[0].line, 2);
2551        assert_eq!(result[1].line, 5);
2552    }
2553
2554    #[test]
2555    fn test_remark_lint_inline_config_comments_not_flagged() {
2556        let config = MD044Config {
2557            names: vec!["JavaScript".to_string()],
2558            ..MD044Config::default()
2559        };
2560        let rule = MD044ProperNames::from_config_struct(config);
2561
2562        let content = "\
2563<!-- lint disable remark-lint-some-rule -->
2564Some javascript text here.
2565<!-- lint enable remark-lint-some-rule -->
2566<!-- lint ignore remark-lint-some-rule -->
2567More javascript text.
2568";
2569        let ctx = create_context(content);
2570        let result = rule.check(&ctx).unwrap();
2571
2572        assert_eq!(
2573            result.len(),
2574            2,
2575            "Should only flag body lines, not remark-lint config comments"
2576        );
2577        assert_eq!(result[0].line, 2);
2578        assert_eq!(result[1].line, 5);
2579    }
2580
2581    #[test]
2582    fn test_fix_does_not_modify_vale_remark_lint_comments() {
2583        let config = MD044Config {
2584            names: vec!["JavaScript".to_string(), "Vale".to_string()],
2585            ..MD044Config::default()
2586        };
2587        let rule = MD044ProperNames::from_config_struct(config);
2588
2589        let content = "\
2590<!-- vale off -->
2591Some javascript text.
2592<!-- vale on -->
2593<!-- lint disable remark-lint-some-rule -->
2594More javascript text.
2595<!-- lint enable remark-lint-some-rule -->
2596";
2597        let ctx = create_context(content);
2598        let fixed = rule.fix(&ctx).unwrap();
2599
2600        // Config directive lines must be preserved unchanged
2601        assert!(fixed.contains("<!-- vale off -->"));
2602        assert!(fixed.contains("<!-- vale on -->"));
2603        assert!(fixed.contains("<!-- lint disable remark-lint-some-rule -->"));
2604        assert!(fixed.contains("<!-- lint enable remark-lint-some-rule -->"));
2605        // Body text should be fixed
2606        assert!(fixed.contains("Some JavaScript text."));
2607        assert!(fixed.contains("More JavaScript text."));
2608    }
2609
2610    #[test]
2611    fn test_mixed_tool_directives_all_skipped() {
2612        let config = MD044Config {
2613            names: vec!["JavaScript".to_string(), "Vale".to_string()],
2614            ..MD044Config::default()
2615        };
2616        let rule = MD044ProperNames::from_config_struct(config);
2617
2618        let content = "\
2619<!-- rumdl-disable MD044 -->
2620Some javascript text.
2621<!-- markdownlint-disable -->
2622More javascript text.
2623<!-- vale off -->
2624Even more javascript text.
2625<!-- lint disable some-rule -->
2626Final javascript text.
2627<!-- rumdl-enable MD044 -->
2628<!-- markdownlint-enable -->
2629<!-- vale on -->
2630<!-- lint enable some-rule -->
2631";
2632        let ctx = create_context(content);
2633        let result = rule.check(&ctx).unwrap();
2634
2635        // Only body text lines should be flagged (lines 2, 4, 6, 8)
2636        assert_eq!(
2637            result.len(),
2638            4,
2639            "Should only flag body lines, not any tool directive comments"
2640        );
2641        assert_eq!(result[0].line, 2);
2642        assert_eq!(result[1].line, 4);
2643        assert_eq!(result[2].line, 6);
2644        assert_eq!(result[3].line, 8);
2645    }
2646
2647    #[test]
2648    fn test_vale_remark_lint_edge_cases_not_matched() {
2649        let config = MD044Config {
2650            names: vec!["JavaScript".to_string(), "Vale".to_string()],
2651            ..MD044Config::default()
2652        };
2653        let rule = MD044ProperNames::from_config_struct(config);
2654
2655        // These are regular HTML comments, NOT tool directives:
2656        // - "<!-- vale -->" is not a valid Vale directive (no action keyword)
2657        // - "<!-- vale is a tool -->" starts with "vale" but is prose, not a directive
2658        // - "<!-- valedictorian javascript -->" does not start with "<!-- vale "
2659        // - "<!-- linting javascript tips -->" does not start with "<!-- lint "
2660        // - "<!-- vale javascript -->" starts with "vale" but has no action keyword
2661        // - "<!-- lint your javascript code -->" starts with "lint" but has no action keyword
2662        let content = "\
2663<!-- vale -->
2664<!-- vale is a tool for writing -->
2665<!-- valedictorian javascript -->
2666<!-- linting javascript tips -->
2667<!-- vale javascript -->
2668<!-- lint your javascript code -->
2669";
2670        let ctx = create_context(content);
2671        let result = rule.check(&ctx).unwrap();
2672
2673        // Line 1: "<!-- vale -->" contains "vale" (wrong case for "Vale") -> flagged
2674        // Line 2: "<!-- vale is a tool for writing -->" contains "vale" -> flagged
2675        // Line 3: "<!-- valedictorian javascript -->" contains "javascript" -> flagged
2676        // Line 4: "<!-- linting javascript tips -->" contains "javascript" -> flagged
2677        // Line 5: "<!-- vale javascript -->" contains "vale" and "javascript" -> flagged for both
2678        // Line 6: "<!-- lint your javascript code -->" contains "javascript" -> flagged
2679        assert_eq!(
2680            result.len(),
2681            7,
2682            "Should flag proper names in non-directive HTML comments: got {result:?}"
2683        );
2684        assert_eq!(result[0].line, 1); // "vale" in <!-- vale -->
2685        assert_eq!(result[1].line, 2); // "vale" in <!-- vale is a tool -->
2686        assert_eq!(result[2].line, 3); // "javascript" in <!-- valedictorian javascript -->
2687        assert_eq!(result[3].line, 4); // "javascript" in <!-- linting javascript tips -->
2688        assert_eq!(result[4].line, 5); // "vale" in <!-- vale javascript -->
2689        assert_eq!(result[5].line, 5); // "javascript" in <!-- vale javascript -->
2690        assert_eq!(result[6].line, 6); // "javascript" in <!-- lint your javascript code -->
2691    }
2692
2693    #[test]
2694    fn test_vale_style_directives_skipped() {
2695        let config = MD044Config {
2696            names: vec!["JavaScript".to_string(), "Vale".to_string()],
2697            ..MD044Config::default()
2698        };
2699        let rule = MD044ProperNames::from_config_struct(config);
2700
2701        // These ARE valid Vale directives and should be skipped:
2702        let content = "\
2703<!-- vale style = MyStyle -->
2704<!-- vale styles = Style1, Style2 -->
2705<!-- vale MyRule.Name = YES -->
2706<!-- vale MyRule.Name = NO -->
2707Some javascript text.
2708";
2709        let ctx = create_context(content);
2710        let result = rule.check(&ctx).unwrap();
2711
2712        // Only line 5 (body text) should be flagged
2713        assert_eq!(
2714            result.len(),
2715            1,
2716            "Should only flag body lines, not Vale style/rule directives: got {result:?}"
2717        );
2718        assert_eq!(result[0].line, 5);
2719    }
2720
2721    // --- is_in_backtick_code_in_line unit tests ---
2722
2723    #[test]
2724    fn test_backtick_code_single_backticks() {
2725        let line = "hello `world` bye";
2726        // 'w' is at index 7, inside the backtick span (content between backticks at 6 and 12)
2727        assert!(MD044ProperNames::is_in_backtick_code_in_line(line, 7));
2728        // 'h' at index 0 is outside
2729        assert!(!MD044ProperNames::is_in_backtick_code_in_line(line, 0));
2730        // 'b' at index 14 is outside
2731        assert!(!MD044ProperNames::is_in_backtick_code_in_line(line, 14));
2732    }
2733
2734    #[test]
2735    fn test_backtick_code_double_backticks() {
2736        let line = "a ``code`` b";
2737        // 'c' is at index 4, inside ``...``
2738        assert!(MD044ProperNames::is_in_backtick_code_in_line(line, 4));
2739        // 'a' at index 0 is outside
2740        assert!(!MD044ProperNames::is_in_backtick_code_in_line(line, 0));
2741        // 'b' at index 11 is outside
2742        assert!(!MD044ProperNames::is_in_backtick_code_in_line(line, 11));
2743    }
2744
2745    #[test]
2746    fn test_backtick_code_unclosed() {
2747        let line = "a `code b";
2748        // No closing backtick, so nothing is a code span
2749        assert!(!MD044ProperNames::is_in_backtick_code_in_line(line, 3));
2750    }
2751
2752    #[test]
2753    fn test_backtick_code_mismatched_count() {
2754        // Single backtick opening, double backtick is not a match
2755        let line = "a `code`` b";
2756        // The single ` at index 2 doesn't match `` at index 7-8
2757        // So 'c' at index 3 is NOT in a code span
2758        assert!(!MD044ProperNames::is_in_backtick_code_in_line(line, 3));
2759    }
2760
2761    #[test]
2762    fn test_backtick_code_multiple_spans() {
2763        let line = "`first` and `second`";
2764        // 'f' at index 1 (inside first span)
2765        assert!(MD044ProperNames::is_in_backtick_code_in_line(line, 1));
2766        // 'a' at index 8 (between spans)
2767        assert!(!MD044ProperNames::is_in_backtick_code_in_line(line, 8));
2768        // 's' at index 13 (inside second span)
2769        assert!(MD044ProperNames::is_in_backtick_code_in_line(line, 13));
2770    }
2771
2772    #[test]
2773    fn test_backtick_code_on_backtick_boundary() {
2774        let line = "`code`";
2775        // Position 0 is the opening backtick itself, not inside the span
2776        assert!(!MD044ProperNames::is_in_backtick_code_in_line(line, 0));
2777        // Position 5 is the closing backtick, not inside the span
2778        assert!(!MD044ProperNames::is_in_backtick_code_in_line(line, 5));
2779        // Position 1-4 are inside the span
2780        assert!(MD044ProperNames::is_in_backtick_code_in_line(line, 1));
2781        assert!(MD044ProperNames::is_in_backtick_code_in_line(line, 4));
2782    }
2783}
rumdl_lib/rules/md044_proper_names.rs

rumdl_lib/rules/
md044_proper_names.rs