rumdl_lib/rules/
md044_proper_names.rs

1use crate::utils::fast_hash;
2use crate::utils::regex_cache::{escape_regex, get_cached_fancy_regex};
3
4use crate::rule::{Fix, LintError, LintResult, LintWarning, Rule, Severity};
5use std::collections::{HashMap, HashSet};
6use std::sync::{Arc, Mutex};
7
8mod md044_config;
9pub use md044_config::MD044Config;
10
11type WarningPosition = (usize, usize, String); // (line, column, found_name)
12
13/// Rule MD044: Proper names should be capitalized
14///
15/// See [docs/md044.md](../../docs/md044.md) for full documentation, configuration, and examples.
16///
17/// This rule is triggered when proper names are not capitalized correctly in the document.
18/// For example, if you have defined "JavaScript" as a proper name, the rule will flag any
19/// occurrences of "javascript" or "Javascript" as violations.
20///
21/// ## Purpose
22///
23/// Ensuring consistent capitalization of proper names improves document quality and
24/// professionalism. This is especially important for technical documentation where
25/// product names, programming languages, and technologies often have specific
26/// capitalization conventions.
27///
28/// ## Configuration Options
29///
30/// The rule supports the following configuration options:
31///
32/// ```yaml
33/// MD044:
34///   names: []                # List of proper names to check for correct capitalization
35///   code-blocks: false       # Whether to check code blocks (default: false)
36/// ```
37///
38/// Example configuration:
39///
40/// ```yaml
41/// MD044:
42///   names: ["JavaScript", "Node.js", "TypeScript"]
43///   code-blocks: true
44/// ```
45///
46/// ## Performance Optimizations
47///
48/// This rule implements several performance optimizations:
49///
50/// 1. **Regex Caching**: Pre-compiles and caches regex patterns for each proper name
51/// 2. **Content Caching**: Caches results based on content hashing for repeated checks
52/// 3. **Efficient Text Processing**: Uses optimized algorithms to avoid redundant text processing
53/// 4. **Smart Code Block Detection**: Efficiently identifies and optionally excludes code blocks
54///
55/// ## Edge Cases Handled
56///
57/// - **Word Boundaries**: Only matches complete words, not substrings within other words
58/// - **Case Sensitivity**: Properly handles case-specific matching
59/// - **Code Blocks**: Optionally checks code blocks (controlled by code-blocks setting)
60/// - **Markdown Formatting**: Handles proper names within Markdown formatting elements
61///
62/// ## Fix Behavior
63///
64/// When fixing issues, this rule replaces incorrect capitalization with the correct form
65/// as defined in the configuration.
66///
67/// Check if a trimmed line is an inline config comment from a linting tool.
68/// Recognized tools: rumdl, markdownlint, Vale, and remark-lint.
69fn is_inline_config_comment(trimmed: &str) -> bool {
70    trimmed.starts_with("<!-- rumdl-")
71        || trimmed.starts_with("<!-- markdownlint-")
72        || trimmed.starts_with("<!-- vale off")
73        || trimmed.starts_with("<!-- vale on")
74        || (trimmed.starts_with("<!-- vale ") && trimmed.contains(" = "))
75        || trimmed.starts_with("<!-- vale style")
76        || trimmed.starts_with("<!-- lint disable ")
77        || trimmed.starts_with("<!-- lint enable ")
78        || trimmed.starts_with("<!-- lint ignore ")
79}
80
81#[derive(Clone)]
82pub struct MD044ProperNames {
83    config: MD044Config,
84    // Cache the combined regex pattern string
85    combined_pattern: Option<String>,
86    // Precomputed lowercase name variants for fast pre-checks
87    name_variants: Vec<String>,
88    // Cache for name violations by content hash
89    content_cache: Arc<Mutex<HashMap<u64, Vec<WarningPosition>>>>,
90}
91
92impl MD044ProperNames {
93    pub fn new(names: Vec<String>, code_blocks: bool) -> Self {
94        let config = MD044Config {
95            names,
96            code_blocks,
97            html_elements: true, // Default to checking HTML elements
98            html_comments: true, // Default to checking HTML comments
99        };
100        let combined_pattern = Self::create_combined_pattern(&config);
101        let name_variants = Self::build_name_variants(&config);
102        Self {
103            config,
104            combined_pattern,
105            name_variants,
106            content_cache: Arc::new(Mutex::new(HashMap::new())),
107        }
108    }
109
110    // Helper function for consistent ASCII normalization
111    fn ascii_normalize(s: &str) -> String {
112        s.replace(['é', 'è', 'ê', 'ë'], "e")
113            .replace(['à', 'á', 'â', 'ä', 'ã', 'å'], "a")
114            .replace(['ï', 'î', 'í', 'ì'], "i")
115            .replace(['ü', 'ú', 'ù', 'û'], "u")
116            .replace(['ö', 'ó', 'ò', 'ô', 'õ'], "o")
117            .replace('ñ', "n")
118            .replace('ç', "c")
119    }
120
121    pub fn from_config_struct(config: MD044Config) -> Self {
122        let combined_pattern = Self::create_combined_pattern(&config);
123        let name_variants = Self::build_name_variants(&config);
124        Self {
125            config,
126            combined_pattern,
127            name_variants,
128            content_cache: Arc::new(Mutex::new(HashMap::new())),
129        }
130    }
131
132    // Create a combined regex pattern for all proper names
133    fn create_combined_pattern(config: &MD044Config) -> Option<String> {
134        if config.names.is_empty() {
135            return None;
136        }
137
138        // Create patterns for all names and their variations
139        let mut patterns: Vec<String> = config
140            .names
141            .iter()
142            .flat_map(|name| {
143                let mut variations = vec![];
144                let lower_name = name.to_lowercase();
145
146                // Add the lowercase version
147                variations.push(escape_regex(&lower_name));
148
149                // Add version without dots
150                let lower_name_no_dots = lower_name.replace('.', "");
151                if lower_name != lower_name_no_dots {
152                    variations.push(escape_regex(&lower_name_no_dots));
153                }
154
155                // Add ASCII-normalized versions for common accented characters
156                let ascii_normalized = Self::ascii_normalize(&lower_name);
157
158                if ascii_normalized != lower_name {
159                    variations.push(escape_regex(&ascii_normalized));
160
161                    // Also add version without dots
162                    let ascii_no_dots = ascii_normalized.replace('.', "");
163                    if ascii_normalized != ascii_no_dots {
164                        variations.push(escape_regex(&ascii_no_dots));
165                    }
166                }
167
168                variations
169            })
170            .collect();
171
172        // Sort patterns by length (longest first) to avoid shorter patterns matching within longer ones
173        patterns.sort_by_key(|b| std::cmp::Reverse(b.len()));
174
175        // Combine all patterns into a single regex with capture groups
176        // Don't use \b as it doesn't work with Unicode - we'll check boundaries manually
177        Some(format!(r"(?i)({})", patterns.join("|")))
178    }
179
180    fn build_name_variants(config: &MD044Config) -> Vec<String> {
181        let mut variants = HashSet::new();
182        for name in &config.names {
183            let lower_name = name.to_lowercase();
184            variants.insert(lower_name.clone());
185
186            let lower_no_dots = lower_name.replace('.', "");
187            if lower_name != lower_no_dots {
188                variants.insert(lower_no_dots);
189            }
190
191            let ascii_normalized = Self::ascii_normalize(&lower_name);
192            if ascii_normalized != lower_name {
193                variants.insert(ascii_normalized.clone());
194
195                let ascii_no_dots = ascii_normalized.replace('.', "");
196                if ascii_normalized != ascii_no_dots {
197                    variants.insert(ascii_no_dots);
198                }
199            }
200        }
201
202        variants.into_iter().collect()
203    }
204
205    // Find all name violations in the content and return positions.
206    // `content_lower` is the pre-computed lowercase version of `content` to avoid redundant allocations.
207    fn find_name_violations(
208        &self,
209        content: &str,
210        ctx: &crate::lint_context::LintContext,
211        content_lower: &str,
212    ) -> Vec<WarningPosition> {
213        // Early return: if no names configured or content is empty
214        if self.config.names.is_empty() || content.is_empty() || self.combined_pattern.is_none() {
215            return Vec::new();
216        }
217
218        // Early return: quick check if any of the configured names might be in content
219        let has_potential_matches = self.name_variants.iter().any(|name| content_lower.contains(name));
220
221        if !has_potential_matches {
222            return Vec::new();
223        }
224
225        // Check if we have cached results
226        let hash = fast_hash(content);
227        {
228            // Use a separate scope for borrowing to minimize lock time
229            if let Ok(cache) = self.content_cache.lock()
230                && let Some(cached) = cache.get(&hash)
231            {
232                return cached.clone();
233            }
234        }
235
236        let mut violations = Vec::new();
237
238        // Get the regex from global cache
239        let combined_regex = match &self.combined_pattern {
240            Some(pattern) => match get_cached_fancy_regex(pattern) {
241                Ok(regex) => regex,
242                Err(_) => return Vec::new(),
243            },
244            None => return Vec::new(),
245        };
246
247        // Use ctx.lines for better performance
248        for (line_idx, line_info) in ctx.lines.iter().enumerate() {
249            let line_num = line_idx + 1;
250            let line = line_info.content(ctx.content);
251
252            // Skip code fence lines (```language or ~~~language)
253            let trimmed = line.trim_start();
254            if trimmed.starts_with("```") || trimmed.starts_with("~~~") {
255                continue;
256            }
257
258            // Skip if in code block (when code_blocks = false)
259            if !self.config.code_blocks && line_info.in_code_block {
260                continue;
261            }
262
263            // Skip if in HTML block (when html_elements = false)
264            if !self.config.html_elements && line_info.in_html_block {
265                continue;
266            }
267
268            // Skip HTML comments using pre-computed line flag
269            if !self.config.html_comments && line_info.in_html_comment {
270                continue;
271            }
272
273            // Skip JSX expressions and MDX comments (MDX flavor)
274            if line_info.in_jsx_expression || line_info.in_mdx_comment {
275                continue;
276            }
277
278            // Skip Obsidian comments (Obsidian flavor)
279            if line_info.in_obsidian_comment {
280                continue;
281            }
282
283            // For frontmatter lines, determine offset where checkable value content starts.
284            // YAML keys should not be checked against proper names - only values.
285            let fm_value_offset = if line_info.in_front_matter {
286                Self::frontmatter_value_offset(line)
287            } else {
288                0
289            };
290            if fm_value_offset == usize::MAX {
291                continue;
292            }
293
294            // Skip inline config comments (rumdl, markdownlint, Vale, remark-lint directives)
295            if is_inline_config_comment(trimmed) {
296                continue;
297            }
298
299            // Early return: skip lines that don't contain any potential matches
300            let line_lower = line.to_lowercase();
301            let has_line_matches = self.name_variants.iter().any(|name| line_lower.contains(name));
302
303            if !has_line_matches {
304                continue;
305            }
306
307            // Use the combined regex to find all matches in one pass
308            for cap_result in combined_regex.find_iter(line) {
309                match cap_result {
310                    Ok(cap) => {
311                        let found_name = &line[cap.start()..cap.end()];
312
313                        // Check word boundaries manually for Unicode support
314                        let start_pos = cap.start();
315                        let end_pos = cap.end();
316
317                        // Skip matches in the key portion of frontmatter lines
318                        if start_pos < fm_value_offset {
319                            continue;
320                        }
321
322                        // Skip matches inside HTML tag attributes (handles multi-line tags)
323                        let byte_pos = line_info.byte_offset + start_pos;
324                        if ctx.is_in_html_tag(byte_pos) {
325                            continue;
326                        }
327
328                        if !Self::is_at_word_boundary(line, start_pos, true)
329                            || !Self::is_at_word_boundary(line, end_pos, false)
330                        {
331                            continue; // Not at word boundary
332                        }
333
334                        // Skip if in inline code when code_blocks is false
335                        if !self.config.code_blocks && ctx.is_in_code_block_or_span(byte_pos) {
336                            continue;
337                        }
338
339                        // Skip if in link URL or reference definition
340                        if Self::is_in_link(ctx, byte_pos) {
341                            continue;
342                        }
343
344                        // Skip if inside an angle-bracket URL (e.g., <https://...>)
345                        // The link parser skips autolinks inside HTML comments,
346                        // so we detect them directly in the line text.
347                        if Self::is_in_angle_bracket_url(line, start_pos) {
348                            continue;
349                        }
350
351                        // Find which proper name this matches
352                        if let Some(proper_name) = self.get_proper_name_for(found_name) {
353                            // Only flag if it's not already correct
354                            if found_name != proper_name {
355                                violations.push((line_num, cap.start() + 1, found_name.to_string()));
356                            }
357                        }
358                    }
359                    Err(e) => {
360                        eprintln!("Regex execution error on line {line_num}: {e}");
361                    }
362                }
363            }
364        }
365
366        // Store in cache (ignore if mutex is poisoned)
367        if let Ok(mut cache) = self.content_cache.lock() {
368            cache.insert(hash, violations.clone());
369        }
370        violations
371    }
372
373    /// Check if a byte position is within a link URL (not link text)
374    ///
375    /// Link text should be checked for proper names, but URLs should be skipped.
376    /// For `[text](url)` - check text, skip url
377    /// For `[text][ref]` - check text, skip reference portion
378    /// For `[[text]]` (WikiLinks) - check text, skip brackets
379    fn is_in_link(ctx: &crate::lint_context::LintContext, byte_pos: usize) -> bool {
380        use pulldown_cmark::LinkType;
381
382        // Binary search links (sorted by byte_offset) to find candidate containing byte_pos
383        let link_idx = ctx.links.partition_point(|link| link.byte_offset <= byte_pos);
384        if link_idx > 0 {
385            let link = &ctx.links[link_idx - 1];
386            if byte_pos < link.byte_end {
387                // WikiLinks [[text]] start with '[[', regular links [text] start with '['
388                let text_start = if matches!(link.link_type, LinkType::WikiLink { .. }) {
389                    link.byte_offset + 2
390                } else {
391                    link.byte_offset + 1
392                };
393                let text_end = text_start + link.text.len();
394
395                // If position is within the text portion, skip only if text is a URL
396                if byte_pos >= text_start && byte_pos < text_end {
397                    return Self::link_text_is_url(&link.text);
398                }
399                // Position is in the URL/reference portion, skip it
400                return true;
401            }
402        }
403
404        // Binary search images (sorted by byte_offset) to find candidate containing byte_pos
405        let image_idx = ctx.images.partition_point(|img| img.byte_offset <= byte_pos);
406        if image_idx > 0 {
407            let image = &ctx.images[image_idx - 1];
408            if byte_pos < image.byte_end {
409                // Image starts with '![' so alt text starts at byte_offset + 2
410                let alt_start = image.byte_offset + 2;
411                let alt_end = alt_start + image.alt_text.len();
412
413                // If position is within the alt text portion, don't skip
414                if byte_pos >= alt_start && byte_pos < alt_end {
415                    return false;
416                }
417                // Position is in the URL/reference portion, skip it
418                return true;
419            }
420        }
421
422        // Check pre-computed reference definitions
423        ctx.is_in_reference_def(byte_pos)
424    }
425
426    /// Check if link text is a URL that should not have proper name corrections.
427    /// Matches markdownlint behavior: skip text starting with `http://`, `https://`, or `www.`.
428    fn link_text_is_url(text: &str) -> bool {
429        let lower = text.trim().to_ascii_lowercase();
430        lower.starts_with("http://") || lower.starts_with("https://") || lower.starts_with("www.")
431    }
432
433    /// Check if a position within a line falls inside an angle-bracket URL (`<scheme://...>`).
434    ///
435    /// The link parser skips autolinks inside HTML comments, so `ctx.links` won't
436    /// contain them. This function detects angle-bracket URLs directly in the line
437    /// text, covering both HTML comments and regular text as a safety net.
438    fn is_in_angle_bracket_url(line: &str, pos: usize) -> bool {
439        let bytes = line.as_bytes();
440        let len = bytes.len();
441        let mut i = 0;
442        while i < len {
443            if bytes[i] == b'<' {
444                let after_open = i + 1;
445                // Check for a valid URI scheme per CommonMark autolink spec:
446                // scheme = [a-zA-Z][a-zA-Z0-9+.-]{0,31}
447                // followed by ':'
448                if after_open < len && bytes[after_open].is_ascii_alphabetic() {
449                    let mut s = after_open + 1;
450                    let scheme_max = (after_open + 32).min(len);
451                    while s < scheme_max
452                        && (bytes[s].is_ascii_alphanumeric()
453                            || bytes[s] == b'+'
454                            || bytes[s] == b'-'
455                            || bytes[s] == b'.')
456                    {
457                        s += 1;
458                    }
459                    if s < len && bytes[s] == b':' {
460                        // Valid scheme found; scan for closing '>' with no spaces or '<'
461                        let mut j = s + 1;
462                        let mut found_close = false;
463                        while j < len {
464                            match bytes[j] {
465                                b'>' => {
466                                    found_close = true;
467                                    break;
468                                }
469                                b' ' | b'<' => break,
470                                _ => j += 1,
471                            }
472                        }
473                        if found_close && pos >= i && pos <= j {
474                            return true;
475                        }
476                        if found_close {
477                            i = j + 1;
478                            continue;
479                        }
480                    }
481                }
482            }
483            i += 1;
484        }
485        false
486    }
487
488    // Check if a character is a word boundary (handles Unicode)
489    fn is_word_boundary_char(c: char) -> bool {
490        !c.is_alphanumeric()
491    }
492
493    // Check if position is at a word boundary using byte-level lookups.
494    fn is_at_word_boundary(content: &str, pos: usize, is_start: bool) -> bool {
495        if is_start {
496            if pos == 0 {
497                return true;
498            }
499            match content[..pos].chars().next_back() {
500                None => true,
501                Some(c) => Self::is_word_boundary_char(c),
502            }
503        } else {
504            if pos >= content.len() {
505                return true;
506            }
507            match content[pos..].chars().next() {
508                None => true,
509                Some(c) => Self::is_word_boundary_char(c),
510            }
511        }
512    }
513
514    /// For a frontmatter line, return the byte offset where the checkable
515    /// value portion starts. Returns `usize::MAX` if the entire line should be
516    /// skipped (frontmatter delimiters, key-only lines, YAML comments, flow constructs).
517    fn frontmatter_value_offset(line: &str) -> usize {
518        let trimmed = line.trim();
519
520        // Skip frontmatter delimiters and empty lines
521        if trimmed == "---" || trimmed == "+++" || trimmed.is_empty() {
522            return usize::MAX;
523        }
524
525        // Skip YAML comments
526        if trimmed.starts_with('#') {
527            return usize::MAX;
528        }
529
530        // YAML list item: "  - item" or "  - key: value"
531        let stripped = line.trim_start();
532        if let Some(after_dash) = stripped.strip_prefix("- ") {
533            let leading = line.len() - stripped.len();
534            // Check if the list item contains a mapping (e.g., "- key: value")
535            if let Some(result) = Self::kv_value_offset(line, after_dash, leading + 2) {
536                return result;
537            }
538            // Bare list item value (no colon) - check content after "- "
539            return leading + 2;
540        }
541        if stripped == "-" {
542            return usize::MAX;
543        }
544
545        // Key-value pair with colon separator (YAML): "key: value"
546        if let Some(result) = Self::kv_value_offset(line, stripped, line.len() - stripped.len()) {
547            return result;
548        }
549
550        // Key-value pair with equals separator (TOML): "key = value"
551        if let Some(eq_pos) = line.find('=') {
552            let after_eq = eq_pos + 1;
553            if after_eq < line.len() && line.as_bytes()[after_eq] == b' ' {
554                let value_start = after_eq + 1;
555                let value_slice = &line[value_start..];
556                let value_trimmed = value_slice.trim();
557                if value_trimmed.is_empty() {
558                    return usize::MAX;
559                }
560                // For quoted values, skip the opening quote character
561                if (value_trimmed.starts_with('"') && value_trimmed.ends_with('"'))
562                    || (value_trimmed.starts_with('\'') && value_trimmed.ends_with('\''))
563                {
564                    let quote_offset = value_slice.find(['"', '\'']).unwrap_or(0);
565                    return value_start + quote_offset + 1;
566                }
567                return value_start;
568            }
569            // Equals with no space after or at end of line -> no value to check
570            return usize::MAX;
571        }
572
573        // No separator found - continuation line or bare value, check the whole line
574        0
575    }
576
577    /// Parse a key-value pair using colon separator within `content` that starts
578    /// at `base_offset` in the original line. Returns `Some(offset)` if a colon
579    /// separator is found, `None` if no colon is present.
580    fn kv_value_offset(line: &str, content: &str, base_offset: usize) -> Option<usize> {
581        let colon_pos = content.find(':')?;
582        let abs_colon = base_offset + colon_pos;
583        let after_colon = abs_colon + 1;
584        if after_colon < line.len() && line.as_bytes()[after_colon] == b' ' {
585            let value_start = after_colon + 1;
586            let value_slice = &line[value_start..];
587            let value_trimmed = value_slice.trim();
588            if value_trimmed.is_empty() {
589                return Some(usize::MAX);
590            }
591            // Skip flow mappings and flow sequences - too complex for heuristic parsing
592            if value_trimmed.starts_with('{') || value_trimmed.starts_with('[') {
593                return Some(usize::MAX);
594            }
595            // For quoted values, skip the opening quote character
596            if (value_trimmed.starts_with('"') && value_trimmed.ends_with('"'))
597                || (value_trimmed.starts_with('\'') && value_trimmed.ends_with('\''))
598            {
599                let quote_offset = value_slice.find(['"', '\'']).unwrap_or(0);
600                return Some(value_start + quote_offset + 1);
601            }
602            return Some(value_start);
603        }
604        // Colon with no space after or at end of line -> no value to check
605        Some(usize::MAX)
606    }
607
608    // Get the proper name that should be used for a found name
609    fn get_proper_name_for(&self, found_name: &str) -> Option<String> {
610        let found_lower = found_name.to_lowercase();
611
612        // Iterate through the configured proper names
613        for name in &self.config.names {
614            let lower_name = name.to_lowercase();
615            let lower_name_no_dots = lower_name.replace('.', "");
616
617            // Direct match
618            if found_lower == lower_name || found_lower == lower_name_no_dots {
619                return Some(name.clone());
620            }
621
622            // Check ASCII-normalized version
623            let ascii_normalized = Self::ascii_normalize(&lower_name);
624
625            let ascii_no_dots = ascii_normalized.replace('.', "");
626
627            if found_lower == ascii_normalized || found_lower == ascii_no_dots {
628                return Some(name.clone());
629            }
630        }
631        None
632    }
633}
634
635impl Rule for MD044ProperNames {
636    fn name(&self) -> &'static str {
637        "MD044"
638    }
639
640    fn description(&self) -> &'static str {
641        "Proper names should have the correct capitalization"
642    }
643
644    fn should_skip(&self, ctx: &crate::lint_context::LintContext) -> bool {
645        if self.config.names.is_empty() {
646            return true;
647        }
648        // Quick check if any configured name variants exist (case-insensitive)
649        let content_lower = if ctx.content.is_ascii() {
650            ctx.content.to_ascii_lowercase()
651        } else {
652            ctx.content.to_lowercase()
653        };
654        !self.name_variants.iter().any(|name| content_lower.contains(name))
655    }
656
657    fn check(&self, ctx: &crate::lint_context::LintContext) -> LintResult {
658        let content = ctx.content;
659        if content.is_empty() || self.config.names.is_empty() || self.combined_pattern.is_none() {
660            return Ok(Vec::new());
661        }
662
663        // Compute lowercase content once and reuse across all checks
664        let content_lower = if content.is_ascii() {
665            content.to_ascii_lowercase()
666        } else {
667            content.to_lowercase()
668        };
669
670        // Early return: use pre-computed name_variants for the quick check
671        let has_potential_matches = self.name_variants.iter().any(|name| content_lower.contains(name));
672
673        if !has_potential_matches {
674            return Ok(Vec::new());
675        }
676
677        let line_index = &ctx.line_index;
678        let violations = self.find_name_violations(content, ctx, &content_lower);
679
680        let warnings = violations
681            .into_iter()
682            .filter_map(|(line, column, found_name)| {
683                self.get_proper_name_for(&found_name).map(|proper_name| LintWarning {
684                    rule_name: Some(self.name().to_string()),
685                    line,
686                    column,
687                    end_line: line,
688                    end_column: column + found_name.len(),
689                    message: format!("Proper name '{found_name}' should be '{proper_name}'"),
690                    severity: Severity::Warning,
691                    fix: Some(Fix {
692                        range: line_index.line_col_to_byte_range(line, column),
693                        replacement: proper_name,
694                    }),
695                })
696            })
697            .collect();
698
699        Ok(warnings)
700    }
701
702    fn fix(&self, ctx: &crate::lint_context::LintContext) -> Result<String, LintError> {
703        let content = ctx.content;
704        if content.is_empty() || self.config.names.is_empty() {
705            return Ok(content.to_string());
706        }
707
708        let content_lower = if content.is_ascii() {
709            content.to_ascii_lowercase()
710        } else {
711            content.to_lowercase()
712        };
713        let violations = self.find_name_violations(content, ctx, &content_lower);
714        if violations.is_empty() {
715            return Ok(content.to_string());
716        }
717
718        // Process lines and build the fixed content
719        let mut fixed_lines = Vec::new();
720
721        // Group violations by line
722        let mut violations_by_line: HashMap<usize, Vec<(usize, String)>> = HashMap::new();
723        for (line_num, col_num, found_name) in violations {
724            violations_by_line
725                .entry(line_num)
726                .or_default()
727                .push((col_num, found_name));
728        }
729
730        // Sort violations within each line in reverse order
731        for violations in violations_by_line.values_mut() {
732            violations.sort_by_key(|b| std::cmp::Reverse(b.0));
733        }
734
735        // Process each line
736        for (line_idx, line_info) in ctx.lines.iter().enumerate() {
737            let line_num = line_idx + 1;
738
739            if let Some(line_violations) = violations_by_line.get(&line_num) {
740                // This line has violations, fix them
741                let mut fixed_line = line_info.content(ctx.content).to_string();
742
743                for (col_num, found_name) in line_violations {
744                    if let Some(proper_name) = self.get_proper_name_for(found_name) {
745                        let start_col = col_num - 1; // Convert to 0-based
746                        let end_col = start_col + found_name.len();
747
748                        if end_col <= fixed_line.len()
749                            && fixed_line.is_char_boundary(start_col)
750                            && fixed_line.is_char_boundary(end_col)
751                        {
752                            fixed_line.replace_range(start_col..end_col, &proper_name);
753                        }
754                    }
755                }
756
757                fixed_lines.push(fixed_line);
758            } else {
759                // No violations on this line, keep it as is
760                fixed_lines.push(line_info.content(ctx.content).to_string());
761            }
762        }
763
764        // Join lines with newlines, preserving the original ending
765        let mut result = fixed_lines.join("\n");
766        if content.ends_with('\n') && !result.ends_with('\n') {
767            result.push('\n');
768        }
769        Ok(result)
770    }
771
772    fn as_any(&self) -> &dyn std::any::Any {
773        self
774    }
775
776    fn default_config_section(&self) -> Option<(String, toml::Value)> {
777        let json_value = serde_json::to_value(&self.config).ok()?;
778        Some((
779            self.name().to_string(),
780            crate::rule_config_serde::json_to_toml_value(&json_value)?,
781        ))
782    }
783
784    fn from_config(config: &crate::config::Config) -> Box<dyn Rule>
785    where
786        Self: Sized,
787    {
788        let rule_config = crate::rule_config_serde::load_rule_config::<MD044Config>(config);
789        Box::new(Self::from_config_struct(rule_config))
790    }
791}
792
793#[cfg(test)]
794mod tests {
795    use super::*;
796    use crate::lint_context::LintContext;
797
798    fn create_context(content: &str) -> LintContext<'_> {
799        LintContext::new(content, crate::config::MarkdownFlavor::Standard, None)
800    }
801
802    #[test]
803    fn test_correctly_capitalized_names() {
804        let rule = MD044ProperNames::new(
805            vec![
806                "JavaScript".to_string(),
807                "TypeScript".to_string(),
808                "Node.js".to_string(),
809            ],
810            true,
811        );
812
813        let content = "This document uses JavaScript, TypeScript, and Node.js correctly.";
814        let ctx = create_context(content);
815        let result = rule.check(&ctx).unwrap();
816        assert!(result.is_empty(), "Should not flag correctly capitalized names");
817    }
818
819    #[test]
820    fn test_incorrectly_capitalized_names() {
821        let rule = MD044ProperNames::new(vec!["JavaScript".to_string(), "TypeScript".to_string()], true);
822
823        let content = "This document uses javascript and typescript incorrectly.";
824        let ctx = create_context(content);
825        let result = rule.check(&ctx).unwrap();
826
827        assert_eq!(result.len(), 2, "Should flag two incorrect capitalizations");
828        assert_eq!(result[0].message, "Proper name 'javascript' should be 'JavaScript'");
829        assert_eq!(result[0].line, 1);
830        assert_eq!(result[0].column, 20);
831        assert_eq!(result[1].message, "Proper name 'typescript' should be 'TypeScript'");
832        assert_eq!(result[1].line, 1);
833        assert_eq!(result[1].column, 35);
834    }
835
836    #[test]
837    fn test_names_at_beginning_of_sentences() {
838        let rule = MD044ProperNames::new(vec!["JavaScript".to_string(), "Python".to_string()], true);
839
840        let content = "javascript is a great language. python is also popular.";
841        let ctx = create_context(content);
842        let result = rule.check(&ctx).unwrap();
843
844        assert_eq!(result.len(), 2, "Should flag names at beginning of sentences");
845        assert_eq!(result[0].line, 1);
846        assert_eq!(result[0].column, 1);
847        assert_eq!(result[1].line, 1);
848        assert_eq!(result[1].column, 33);
849    }
850
851    #[test]
852    fn test_names_in_code_blocks_checked_by_default() {
853        let rule = MD044ProperNames::new(vec!["JavaScript".to_string()], true);
854
855        let content = r#"Here is some text with JavaScript.
856
857```javascript
858// This javascript should be checked
859const lang = "javascript";
860```
861
862But this javascript should be flagged."#;
863
864        let ctx = create_context(content);
865        let result = rule.check(&ctx).unwrap();
866
867        assert_eq!(result.len(), 3, "Should flag javascript inside and outside code blocks");
868        assert_eq!(result[0].line, 4);
869        assert_eq!(result[1].line, 5);
870        assert_eq!(result[2].line, 8);
871    }
872
873    #[test]
874    fn test_names_in_code_blocks_ignored_when_disabled() {
875        let rule = MD044ProperNames::new(
876            vec!["JavaScript".to_string()],
877            false, // code_blocks = false means skip code blocks
878        );
879
880        let content = r#"```
881javascript in code block
882```"#;
883
884        let ctx = create_context(content);
885        let result = rule.check(&ctx).unwrap();
886
887        assert_eq!(
888            result.len(),
889            0,
890            "Should not flag javascript in code blocks when code_blocks is false"
891        );
892    }
893
894    #[test]
895    fn test_names_in_inline_code_checked_by_default() {
896        let rule = MD044ProperNames::new(vec!["JavaScript".to_string()], true);
897
898        let content = "This is `javascript` in inline code and javascript outside.";
899        let ctx = create_context(content);
900        let result = rule.check(&ctx).unwrap();
901
902        // When code_blocks=true, inline code should be checked
903        assert_eq!(result.len(), 2, "Should flag javascript inside and outside inline code");
904        assert_eq!(result[0].column, 10); // javascript in inline code
905        assert_eq!(result[1].column, 41); // javascript outside
906    }
907
908    #[test]
909    fn test_multiple_names_in_same_line() {
910        let rule = MD044ProperNames::new(
911            vec!["JavaScript".to_string(), "TypeScript".to_string(), "React".to_string()],
912            true,
913        );
914
915        let content = "I use javascript, typescript, and react in my projects.";
916        let ctx = create_context(content);
917        let result = rule.check(&ctx).unwrap();
918
919        assert_eq!(result.len(), 3, "Should flag all three incorrect names");
920        assert_eq!(result[0].message, "Proper name 'javascript' should be 'JavaScript'");
921        assert_eq!(result[1].message, "Proper name 'typescript' should be 'TypeScript'");
922        assert_eq!(result[2].message, "Proper name 'react' should be 'React'");
923    }
924
925    #[test]
926    fn test_case_sensitivity() {
927        let rule = MD044ProperNames::new(vec!["JavaScript".to_string()], true);
928
929        let content = "JAVASCRIPT, Javascript, javascript, and JavaScript variations.";
930        let ctx = create_context(content);
931        let result = rule.check(&ctx).unwrap();
932
933        assert_eq!(result.len(), 3, "Should flag all incorrect case variations");
934        // JavaScript (correct) should not be flagged
935        assert!(result.iter().all(|w| w.message.contains("should be 'JavaScript'")));
936    }
937
938    #[test]
939    fn test_configuration_with_custom_name_list() {
940        let config = MD044Config {
941            names: vec!["GitHub".to_string(), "GitLab".to_string(), "DevOps".to_string()],
942            code_blocks: true,
943            html_elements: true,
944            html_comments: true,
945        };
946        let rule = MD044ProperNames::from_config_struct(config);
947
948        let content = "We use github, gitlab, and devops for our workflow.";
949        let ctx = create_context(content);
950        let result = rule.check(&ctx).unwrap();
951
952        assert_eq!(result.len(), 3, "Should flag all custom names");
953        assert_eq!(result[0].message, "Proper name 'github' should be 'GitHub'");
954        assert_eq!(result[1].message, "Proper name 'gitlab' should be 'GitLab'");
955        assert_eq!(result[2].message, "Proper name 'devops' should be 'DevOps'");
956    }
957
958    #[test]
959    fn test_empty_configuration() {
960        let rule = MD044ProperNames::new(vec![], true);
961
962        let content = "This has javascript and typescript but no configured names.";
963        let ctx = create_context(content);
964        let result = rule.check(&ctx).unwrap();
965
966        assert!(result.is_empty(), "Should not flag anything with empty configuration");
967    }
968
969    #[test]
970    fn test_names_with_special_characters() {
971        let rule = MD044ProperNames::new(
972            vec!["Node.js".to_string(), "ASP.NET".to_string(), "C++".to_string()],
973            true,
974        );
975
976        let content = "We use nodejs, asp.net, ASP.NET, and c++ in our stack.";
977        let ctx = create_context(content);
978        let result = rule.check(&ctx).unwrap();
979
980        // nodejs should match Node.js (dotless variation)
981        // asp.net should be flagged (wrong case)
982        // ASP.NET should not be flagged (correct)
983        // c++ should be flagged
984        assert_eq!(result.len(), 3, "Should handle special characters correctly");
985
986        let messages: Vec<&str> = result.iter().map(|w| w.message.as_str()).collect();
987        assert!(messages.contains(&"Proper name 'nodejs' should be 'Node.js'"));
988        assert!(messages.contains(&"Proper name 'asp.net' should be 'ASP.NET'"));
989        assert!(messages.contains(&"Proper name 'c++' should be 'C++'"));
990    }
991
992    #[test]
993    fn test_word_boundaries() {
994        let rule = MD044ProperNames::new(vec!["Java".to_string(), "Script".to_string()], true);
995
996        let content = "JavaScript is not java or script, but Java and Script are separate.";
997        let ctx = create_context(content);
998        let result = rule.check(&ctx).unwrap();
999
1000        // Should only flag lowercase "java" and "script" as separate words
1001        assert_eq!(result.len(), 2, "Should respect word boundaries");
1002        assert!(result.iter().any(|w| w.column == 19)); // "java" position
1003        assert!(result.iter().any(|w| w.column == 27)); // "script" position
1004    }
1005
1006    #[test]
1007    fn test_fix_method() {
1008        let rule = MD044ProperNames::new(
1009            vec![
1010                "JavaScript".to_string(),
1011                "TypeScript".to_string(),
1012                "Node.js".to_string(),
1013            ],
1014            true,
1015        );
1016
1017        let content = "I love javascript, typescript, and nodejs!";
1018        let ctx = create_context(content);
1019        let fixed = rule.fix(&ctx).unwrap();
1020
1021        assert_eq!(fixed, "I love JavaScript, TypeScript, and Node.js!");
1022    }
1023
1024    #[test]
1025    fn test_fix_multiple_occurrences() {
1026        let rule = MD044ProperNames::new(vec!["Python".to_string()], true);
1027
1028        let content = "python is great. I use python daily. PYTHON is powerful.";
1029        let ctx = create_context(content);
1030        let fixed = rule.fix(&ctx).unwrap();
1031
1032        assert_eq!(fixed, "Python is great. I use Python daily. Python is powerful.");
1033    }
1034
1035    #[test]
1036    fn test_fix_checks_code_blocks_by_default() {
1037        let rule = MD044ProperNames::new(vec!["JavaScript".to_string()], true);
1038
1039        let content = r#"I love javascript.
1040
1041```
1042const lang = "javascript";
1043```
1044
1045More javascript here."#;
1046
1047        let ctx = create_context(content);
1048        let fixed = rule.fix(&ctx).unwrap();
1049
1050        let expected = r#"I love JavaScript.
1051
1052```
1053const lang = "JavaScript";
1054```
1055
1056More JavaScript here."#;
1057
1058        assert_eq!(fixed, expected);
1059    }
1060
1061    #[test]
1062    fn test_multiline_content() {
1063        let rule = MD044ProperNames::new(vec!["Rust".to_string(), "Python".to_string()], true);
1064
1065        let content = r#"First line with rust.
1066Second line with python.
1067Third line with RUST and PYTHON."#;
1068
1069        let ctx = create_context(content);
1070        let result = rule.check(&ctx).unwrap();
1071
1072        assert_eq!(result.len(), 4, "Should flag all incorrect occurrences");
1073        assert_eq!(result[0].line, 1);
1074        assert_eq!(result[1].line, 2);
1075        assert_eq!(result[2].line, 3);
1076        assert_eq!(result[3].line, 3);
1077    }
1078
1079    #[test]
1080    fn test_default_config() {
1081        let config = MD044Config::default();
1082        assert!(config.names.is_empty());
1083        assert!(!config.code_blocks);
1084        assert!(config.html_elements);
1085        assert!(config.html_comments);
1086    }
1087
1088    #[test]
1089    fn test_default_config_checks_html_comments() {
1090        let config = MD044Config {
1091            names: vec!["JavaScript".to_string()],
1092            ..MD044Config::default()
1093        };
1094        let rule = MD044ProperNames::from_config_struct(config);
1095
1096        let content = "# Guide\n\n<!-- javascript mentioned here -->\n";
1097        let ctx = create_context(content);
1098        let result = rule.check(&ctx).unwrap();
1099
1100        assert_eq!(result.len(), 1, "Default config should check HTML comments");
1101        assert_eq!(result[0].line, 3);
1102    }
1103
1104    #[test]
1105    fn test_default_config_skips_code_blocks() {
1106        let config = MD044Config {
1107            names: vec!["JavaScript".to_string()],
1108            ..MD044Config::default()
1109        };
1110        let rule = MD044ProperNames::from_config_struct(config);
1111
1112        let content = "# Guide\n\n```\njavascript in code\n```\n";
1113        let ctx = create_context(content);
1114        let result = rule.check(&ctx).unwrap();
1115
1116        assert_eq!(result.len(), 0, "Default config should skip code blocks");
1117    }
1118
1119    #[test]
1120    fn test_standalone_html_comment_checked() {
1121        let config = MD044Config {
1122            names: vec!["Test".to_string()],
1123            ..MD044Config::default()
1124        };
1125        let rule = MD044ProperNames::from_config_struct(config);
1126
1127        let content = "# Heading\n\n<!-- this is a test example -->\n";
1128        let ctx = create_context(content);
1129        let result = rule.check(&ctx).unwrap();
1130
1131        assert_eq!(result.len(), 1, "Should flag proper name in standalone HTML comment");
1132        assert_eq!(result[0].line, 3);
1133    }
1134
1135    #[test]
1136    fn test_inline_config_comments_not_flagged() {
1137        let config = MD044Config {
1138            names: vec!["RUMDL".to_string()],
1139            ..MD044Config::default()
1140        };
1141        let rule = MD044ProperNames::from_config_struct(config);
1142
1143        // Lines 1, 3, 4, 6 are inline config comments — should not be flagged.
1144        // Lines 2, 5 contain "rumdl" in regular text — flagged by rule.check(),
1145        // but would be suppressed by the linting engine's inline config filtering.
1146        let content = "<!-- rumdl-disable MD044 -->\nSome rumdl text here.\n<!-- rumdl-enable MD044 -->\n<!-- markdownlint-disable -->\nMore rumdl text.\n<!-- markdownlint-enable -->\n";
1147        let ctx = create_context(content);
1148        let result = rule.check(&ctx).unwrap();
1149
1150        assert_eq!(result.len(), 2, "Should only flag body lines, not config comments");
1151        assert_eq!(result[0].line, 2);
1152        assert_eq!(result[1].line, 5);
1153    }
1154
1155    #[test]
1156    fn test_html_comment_skipped_when_disabled() {
1157        let config = MD044Config {
1158            names: vec!["Test".to_string()],
1159            code_blocks: true,
1160            html_elements: true,
1161            html_comments: false,
1162        };
1163        let rule = MD044ProperNames::from_config_struct(config);
1164
1165        let content = "# Heading\n\n<!-- this is a test example -->\n\nRegular test here.\n";
1166        let ctx = create_context(content);
1167        let result = rule.check(&ctx).unwrap();
1168
1169        assert_eq!(
1170            result.len(),
1171            1,
1172            "Should only flag 'test' outside HTML comment when html_comments=false"
1173        );
1174        assert_eq!(result[0].line, 5);
1175    }
1176
1177    #[test]
1178    fn test_fix_corrects_html_comment_content() {
1179        let config = MD044Config {
1180            names: vec!["JavaScript".to_string()],
1181            ..MD044Config::default()
1182        };
1183        let rule = MD044ProperNames::from_config_struct(config);
1184
1185        let content = "# Guide\n\n<!-- javascript mentioned here -->\n";
1186        let ctx = create_context(content);
1187        let fixed = rule.fix(&ctx).unwrap();
1188
1189        assert_eq!(fixed, "# Guide\n\n<!-- JavaScript mentioned here -->\n");
1190    }
1191
1192    #[test]
1193    fn test_fix_does_not_modify_inline_config_comments() {
1194        let config = MD044Config {
1195            names: vec!["RUMDL".to_string()],
1196            ..MD044Config::default()
1197        };
1198        let rule = MD044ProperNames::from_config_struct(config);
1199
1200        let content = "<!-- rumdl-disable -->\nSome rumdl text.\n<!-- rumdl-enable -->\n";
1201        let ctx = create_context(content);
1202        let fixed = rule.fix(&ctx).unwrap();
1203
1204        // Config comments should be untouched; body text should be fixed
1205        assert!(fixed.contains("<!-- rumdl-disable -->"));
1206        assert!(fixed.contains("<!-- rumdl-enable -->"));
1207        assert!(fixed.contains("Some RUMDL text."));
1208    }
1209
1210    #[test]
1211    fn test_performance_with_many_names() {
1212        let mut names = vec![];
1213        for i in 0..50 {
1214            names.push(format!("ProperName{i}"));
1215        }
1216
1217        let rule = MD044ProperNames::new(names, true);
1218
1219        let content = "This has propername0, propername25, and propername49 incorrectly.";
1220        let ctx = create_context(content);
1221        let result = rule.check(&ctx).unwrap();
1222
1223        assert_eq!(result.len(), 3, "Should handle many configured names efficiently");
1224    }
1225
1226    #[test]
1227    fn test_large_name_count_performance() {
1228        // Verify MD044 can handle large numbers of names without regex limitations
1229        // This test confirms that fancy-regex handles large patterns well
1230        let names = (0..1000).map(|i| format!("ProperName{i}")).collect::<Vec<_>>();
1231
1232        let rule = MD044ProperNames::new(names, true);
1233
1234        // The combined pattern should be created successfully
1235        assert!(rule.combined_pattern.is_some());
1236
1237        // Should be able to check content without errors
1238        let content = "This has propername0 and propername999 in it.";
1239        let ctx = create_context(content);
1240        let result = rule.check(&ctx).unwrap();
1241
1242        // Should detect both incorrect names
1243        assert_eq!(result.len(), 2, "Should handle 1000 names without issues");
1244    }
1245
1246    #[test]
1247    fn test_cache_behavior() {
1248        let rule = MD044ProperNames::new(vec!["JavaScript".to_string()], true);
1249
1250        let content = "Using javascript here.";
1251        let ctx = create_context(content);
1252
1253        // First check
1254        let result1 = rule.check(&ctx).unwrap();
1255        assert_eq!(result1.len(), 1);
1256
1257        // Second check should use cache
1258        let result2 = rule.check(&ctx).unwrap();
1259        assert_eq!(result2.len(), 1);
1260
1261        // Results should be identical
1262        assert_eq!(result1[0].line, result2[0].line);
1263        assert_eq!(result1[0].column, result2[0].column);
1264    }
1265
1266    #[test]
1267    fn test_html_comments_not_checked_when_disabled() {
1268        let config = MD044Config {
1269            names: vec!["JavaScript".to_string()],
1270            code_blocks: true,    // Check code blocks
1271            html_elements: true,  // Check HTML elements
1272            html_comments: false, // Don't check HTML comments
1273        };
1274        let rule = MD044ProperNames::from_config_struct(config);
1275
1276        let content = r#"Regular javascript here.
1277<!-- This javascript in HTML comment should be ignored -->
1278More javascript outside."#;
1279
1280        let ctx = create_context(content);
1281        let result = rule.check(&ctx).unwrap();
1282
1283        assert_eq!(result.len(), 2, "Should only flag javascript outside HTML comments");
1284        assert_eq!(result[0].line, 1);
1285        assert_eq!(result[1].line, 3);
1286    }
1287
1288    #[test]
1289    fn test_html_comments_checked_when_enabled() {
1290        let config = MD044Config {
1291            names: vec!["JavaScript".to_string()],
1292            code_blocks: true,   // Check code blocks
1293            html_elements: true, // Check HTML elements
1294            html_comments: true, // Check HTML comments
1295        };
1296        let rule = MD044ProperNames::from_config_struct(config);
1297
1298        let content = r#"Regular javascript here.
1299<!-- This javascript in HTML comment should be checked -->
1300More javascript outside."#;
1301
1302        let ctx = create_context(content);
1303        let result = rule.check(&ctx).unwrap();
1304
1305        assert_eq!(
1306            result.len(),
1307            3,
1308            "Should flag all javascript occurrences including in HTML comments"
1309        );
1310    }
1311
1312    #[test]
1313    fn test_multiline_html_comments() {
1314        let config = MD044Config {
1315            names: vec!["Python".to_string(), "JavaScript".to_string()],
1316            code_blocks: true,    // Check code blocks
1317            html_elements: true,  // Check HTML elements
1318            html_comments: false, // Don't check HTML comments
1319        };
1320        let rule = MD044ProperNames::from_config_struct(config);
1321
1322        let content = r#"Regular python here.
1323<!--
1324This is a multiline comment
1325with javascript and python
1326that should be ignored
1327-->
1328More javascript outside."#;
1329
1330        let ctx = create_context(content);
1331        let result = rule.check(&ctx).unwrap();
1332
1333        assert_eq!(result.len(), 2, "Should only flag names outside HTML comments");
1334        assert_eq!(result[0].line, 1); // python
1335        assert_eq!(result[1].line, 7); // javascript
1336    }
1337
1338    #[test]
1339    fn test_fix_preserves_html_comments_when_disabled() {
1340        let config = MD044Config {
1341            names: vec!["JavaScript".to_string()],
1342            code_blocks: true,    // Check code blocks
1343            html_elements: true,  // Check HTML elements
1344            html_comments: false, // Don't check HTML comments
1345        };
1346        let rule = MD044ProperNames::from_config_struct(config);
1347
1348        let content = r#"javascript here.
1349<!-- javascript in comment -->
1350More javascript."#;
1351
1352        let ctx = create_context(content);
1353        let fixed = rule.fix(&ctx).unwrap();
1354
1355        let expected = r#"JavaScript here.
1356<!-- javascript in comment -->
1357More JavaScript."#;
1358
1359        assert_eq!(
1360            fixed, expected,
1361            "Should not fix names inside HTML comments when disabled"
1362        );
1363    }
1364
1365    #[test]
1366    fn test_proper_names_in_link_text_are_flagged() {
1367        let rule = MD044ProperNames::new(
1368            vec!["JavaScript".to_string(), "Node.js".to_string(), "Python".to_string()],
1369            true,
1370        );
1371
1372        let content = r#"Check this [javascript documentation](https://javascript.info) for info.
1373
1374Visit [node.js homepage](https://nodejs.org) and [python tutorial](https://python.org).
1375
1376Real javascript should be flagged.
1377
1378Also see the [typescript guide][ts-ref] for more.
1379
1380Real python should be flagged too.
1381
1382[ts-ref]: https://typescript.org/handbook"#;
1383
1384        let ctx = create_context(content);
1385        let result = rule.check(&ctx).unwrap();
1386
1387        // Link text should be checked, URLs should not be checked
1388        // Line 1: [javascript documentation] - "javascript" should be flagged
1389        // Line 3: [node.js homepage] - "node.js" should be flagged (matches "Node.js")
1390        // Line 3: [python tutorial] - "python" should be flagged
1391        // Line 5: standalone javascript
1392        // Line 9: standalone python
1393        assert_eq!(result.len(), 5, "Expected 5 warnings: 3 in link text + 2 standalone");
1394
1395        // Verify line numbers for link text warnings
1396        let line_1_warnings: Vec<_> = result.iter().filter(|w| w.line == 1).collect();
1397        assert_eq!(line_1_warnings.len(), 1);
1398        assert!(
1399            line_1_warnings[0]
1400                .message
1401                .contains("'javascript' should be 'JavaScript'")
1402        );
1403
1404        let line_3_warnings: Vec<_> = result.iter().filter(|w| w.line == 3).collect();
1405        assert_eq!(line_3_warnings.len(), 2); // node.js and python
1406
1407        // Standalone warnings
1408        assert!(result.iter().any(|w| w.line == 5 && w.message.contains("'javascript'")));
1409        assert!(result.iter().any(|w| w.line == 9 && w.message.contains("'python'")));
1410    }
1411
1412    #[test]
1413    fn test_link_urls_not_flagged() {
1414        let rule = MD044ProperNames::new(vec!["JavaScript".to_string()], true);
1415
1416        // URL contains "javascript" but should NOT be flagged
1417        let content = r#"[Link Text](https://javascript.info/guide)"#;
1418
1419        let ctx = create_context(content);
1420        let result = rule.check(&ctx).unwrap();
1421
1422        // URL should not be checked
1423        assert!(result.is_empty(), "URLs should not be checked for proper names");
1424    }
1425
1426    #[test]
1427    fn test_proper_names_in_image_alt_text_are_flagged() {
1428        let rule = MD044ProperNames::new(vec!["JavaScript".to_string()], true);
1429
1430        let content = r#"Here is a ![javascript logo](javascript.png "javascript icon") image.
1431
1432Real javascript should be flagged."#;
1433
1434        let ctx = create_context(content);
1435        let result = rule.check(&ctx).unwrap();
1436
1437        // Image alt text should be checked, URL and title should not be checked
1438        // Line 1: ![javascript logo] - "javascript" should be flagged
1439        // Line 3: standalone javascript
1440        assert_eq!(result.len(), 2, "Expected 2 warnings: 1 in alt text + 1 standalone");
1441        assert!(result[0].message.contains("'javascript' should be 'JavaScript'"));
1442        assert!(result[0].line == 1); // "![javascript logo]"
1443        assert!(result[1].message.contains("'javascript' should be 'JavaScript'"));
1444        assert!(result[1].line == 3); // "Real javascript should be flagged."
1445    }
1446
1447    #[test]
1448    fn test_image_urls_not_flagged() {
1449        let rule = MD044ProperNames::new(vec!["JavaScript".to_string()], true);
1450
1451        // URL contains "javascript" but should NOT be flagged
1452        let content = r#"![Logo](https://javascript.info/logo.png)"#;
1453
1454        let ctx = create_context(content);
1455        let result = rule.check(&ctx).unwrap();
1456
1457        // Image URL should not be checked
1458        assert!(result.is_empty(), "Image URLs should not be checked for proper names");
1459    }
1460
1461    #[test]
1462    fn test_reference_link_text_flagged_but_definition_not() {
1463        let rule = MD044ProperNames::new(vec!["JavaScript".to_string(), "TypeScript".to_string()], true);
1464
1465        let content = r#"Check the [javascript guide][js-ref] for details.
1466
1467Real javascript should be flagged.
1468
1469[js-ref]: https://javascript.info/typescript/guide"#;
1470
1471        let ctx = create_context(content);
1472        let result = rule.check(&ctx).unwrap();
1473
1474        // Link text should be checked, reference definitions should not
1475        // Line 1: [javascript guide] - should be flagged
1476        // Line 3: standalone javascript - should be flagged
1477        // Line 5: reference definition - should NOT be flagged
1478        assert_eq!(result.len(), 2, "Expected 2 warnings: 1 in link text + 1 standalone");
1479        assert!(result.iter().any(|w| w.line == 1 && w.message.contains("'javascript'")));
1480        assert!(result.iter().any(|w| w.line == 3 && w.message.contains("'javascript'")));
1481    }
1482
1483    #[test]
1484    fn test_reference_definitions_not_flagged() {
1485        let rule = MD044ProperNames::new(vec!["JavaScript".to_string()], true);
1486
1487        // Reference definition should NOT be flagged
1488        let content = r#"[js-ref]: https://javascript.info/guide"#;
1489
1490        let ctx = create_context(content);
1491        let result = rule.check(&ctx).unwrap();
1492
1493        // Reference definition URLs should not be checked
1494        assert!(result.is_empty(), "Reference definitions should not be checked");
1495    }
1496
1497    #[test]
1498    fn test_wikilinks_text_is_flagged() {
1499        let rule = MD044ProperNames::new(vec!["JavaScript".to_string()], true);
1500
1501        // WikiLinks [[destination]] should have their text checked
1502        let content = r#"[[javascript]]
1503
1504Regular javascript here.
1505
1506[[JavaScript|display text]]"#;
1507
1508        let ctx = create_context(content);
1509        let result = rule.check(&ctx).unwrap();
1510
1511        // Line 1: [[javascript]] - should be flagged (WikiLink text)
1512        // Line 3: standalone javascript - should be flagged
1513        // Line 5: [[JavaScript|display text]] - correct capitalization, no flag
1514        assert_eq!(result.len(), 2, "Expected 2 warnings: 1 in WikiLink + 1 standalone");
1515        assert!(
1516            result
1517                .iter()
1518                .any(|w| w.line == 1 && w.column == 3 && w.message.contains("'javascript'"))
1519        );
1520        assert!(result.iter().any(|w| w.line == 3 && w.message.contains("'javascript'")));
1521    }
1522
1523    #[test]
1524    fn test_url_link_text_not_flagged() {
1525        let rule = MD044ProperNames::new(vec!["GitHub".to_string()], true);
1526
1527        // Link text that is itself a URL should not be flagged
1528        let content = r#"[https://github.com/org/repo](https://github.com/org/repo)
1529
1530[http://github.com/org/repo](http://github.com/org/repo)
1531
1532[www.github.com/org/repo](https://www.github.com/org/repo)"#;
1533
1534        let ctx = create_context(content);
1535        let result = rule.check(&ctx).unwrap();
1536
1537        assert!(
1538            result.is_empty(),
1539            "URL-like link text should not be flagged, got: {result:?}"
1540        );
1541    }
1542
1543    #[test]
1544    fn test_url_link_text_with_leading_space_not_flagged() {
1545        let rule = MD044ProperNames::new(vec!["GitHub".to_string()], true);
1546
1547        // Leading/trailing whitespace in link text should be trimmed before URL check
1548        let content = r#"[ https://github.com/org/repo](https://github.com/org/repo)"#;
1549
1550        let ctx = create_context(content);
1551        let result = rule.check(&ctx).unwrap();
1552
1553        assert!(
1554            result.is_empty(),
1555            "URL-like link text with leading space should not be flagged, got: {result:?}"
1556        );
1557    }
1558
1559    #[test]
1560    fn test_url_link_text_uppercase_scheme_not_flagged() {
1561        let rule = MD044ProperNames::new(vec!["GitHub".to_string()], true);
1562
1563        let content = r#"[HTTPS://GITHUB.COM/org/repo](https://github.com/org/repo)"#;
1564
1565        let ctx = create_context(content);
1566        let result = rule.check(&ctx).unwrap();
1567
1568        assert!(
1569            result.is_empty(),
1570            "URL-like link text with uppercase scheme should not be flagged, got: {result:?}"
1571        );
1572    }
1573
1574    #[test]
1575    fn test_non_url_link_text_still_flagged() {
1576        let rule = MD044ProperNames::new(vec!["GitHub".to_string()], true);
1577
1578        // Link text that is NOT a URL should still be flagged
1579        let content = r#"[github.com/org/repo](https://github.com/org/repo)
1580
1581[Visit github](https://github.com/org/repo)
1582
1583[//github.com/org/repo](//github.com/org/repo)
1584
1585[ftp://github.com/org/repo](ftp://github.com/org/repo)"#;
1586
1587        let ctx = create_context(content);
1588        let result = rule.check(&ctx).unwrap();
1589
1590        assert_eq!(result.len(), 4, "Non-URL link text should be flagged, got: {result:?}");
1591        assert!(result.iter().any(|w| w.line == 1)); // github.com (no protocol)
1592        assert!(result.iter().any(|w| w.line == 3)); // Visit github
1593        assert!(result.iter().any(|w| w.line == 5)); // //github.com (protocol-relative)
1594        assert!(result.iter().any(|w| w.line == 7)); // ftp://github.com
1595    }
1596
1597    #[test]
1598    fn test_url_link_text_fix_not_applied() {
1599        let rule = MD044ProperNames::new(vec!["GitHub".to_string()], true);
1600
1601        let content = "[https://github.com/org/repo](https://github.com/org/repo)\n";
1602
1603        let ctx = create_context(content);
1604        let result = rule.fix(&ctx).unwrap();
1605
1606        assert_eq!(result, content, "Fix should not modify URL-like link text");
1607    }
1608
1609    #[test]
1610    fn test_mixed_url_and_regular_link_text() {
1611        let rule = MD044ProperNames::new(vec!["GitHub".to_string()], true);
1612
1613        // Mix of URL link text (should skip) and regular text (should flag)
1614        let content = r#"[https://github.com/org/repo](https://github.com/org/repo)
1615
1616Visit [github documentation](https://github.com/docs) for details.
1617
1618[www.github.com/pricing](https://www.github.com/pricing)"#;
1619
1620        let ctx = create_context(content);
1621        let result = rule.check(&ctx).unwrap();
1622
1623        // Only line 3 should be flagged ("github documentation" is not a URL)
1624        assert_eq!(
1625            result.len(),
1626            1,
1627            "Only non-URL link text should be flagged, got: {result:?}"
1628        );
1629        assert_eq!(result[0].line, 3);
1630    }
1631
1632    #[test]
1633    fn test_html_attribute_values_not_flagged() {
1634        // Matches inside HTML tag attributes (between `<` and `>`) are not flagged.
1635        // Attribute values are not prose — they hold URLs, class names, data values, etc.
1636        let rule = MD044ProperNames::new(vec!["Test".to_string()], true);
1637        let content = "# Heading\n\ntest\n\n<img src=\"www.example.test/test_image.png\">\n";
1638        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1639        let result = rule.check(&ctx).unwrap();
1640
1641        // Nothing on line 5 should be flagged — everything is inside the `<img ...>` tag
1642        let line5_violations: Vec<_> = result.iter().filter(|w| w.line == 5).collect();
1643        assert!(
1644            line5_violations.is_empty(),
1645            "Should not flag anything inside HTML tag attributes: {line5_violations:?}"
1646        );
1647
1648        // Plain text on line 3 is still flagged
1649        let line3_violations: Vec<_> = result.iter().filter(|w| w.line == 3).collect();
1650        assert_eq!(line3_violations.len(), 1, "Plain 'test' on line 3 should be flagged");
1651    }
1652
1653    #[test]
1654    fn test_html_text_content_still_flagged() {
1655        // Text between HTML tags (not inside `<...>`) is still checked.
1656        let rule = MD044ProperNames::new(vec!["Test".to_string()], true);
1657        let content = "# Heading\n\n<a href=\"https://example.test/page\">test link</a>\n";
1658        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1659        let result = rule.check(&ctx).unwrap();
1660
1661        // "example.test" in the href attribute → not flagged (inside `<...>`)
1662        // "test link" in the anchor text → flagged (between `>` and `<`)
1663        assert_eq!(
1664            result.len(),
1665            1,
1666            "Should flag only 'test' in anchor text, not in href: {result:?}"
1667        );
1668        assert_eq!(result[0].column, 37, "Should flag col 37 ('test link' in anchor text)");
1669    }
1670
1671    #[test]
1672    fn test_html_attribute_various_not_flagged() {
1673        // All attribute types are ignored: src, href, alt, class, data-*, title, etc.
1674        let rule = MD044ProperNames::new(vec!["Test".to_string()], true);
1675        let content = concat!(
1676            "# Heading\n\n",
1677            "<img src=\"test.png\" alt=\"test image\">\n",
1678            "<span class=\"test-class\" data-test=\"value\">test content</span>\n",
1679        );
1680        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1681        let result = rule.check(&ctx).unwrap();
1682
1683        // Only "test content" (between tags on line 4) should be flagged
1684        assert_eq!(
1685            result.len(),
1686            1,
1687            "Should flag only 'test content' between tags: {result:?}"
1688        );
1689        assert_eq!(result[0].line, 4);
1690    }
1691
1692    #[test]
1693    fn test_plain_text_underscore_boundary_unchanged() {
1694        // Plain text (outside HTML tags) still uses original word boundary semantics where
1695        // underscore is a boundary character, matching markdownlint's behavior via AST splitting.
1696        let rule = MD044ProperNames::new(vec!["Test".to_string()], true);
1697        let content = "# Heading\n\ntest_image is here and just_test ends here\n";
1698        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1699        let result = rule.check(&ctx).unwrap();
1700
1701        // Both "test_image" (test at start) and "just_test" (test at end) are flagged
1702        // because in plain text, "_" is a word boundary
1703        assert_eq!(
1704            result.len(),
1705            2,
1706            "Should flag 'test' in both 'test_image' and 'just_test': {result:?}"
1707        );
1708        let cols: Vec<usize> = result.iter().map(|w| w.column).collect();
1709        assert!(cols.contains(&1), "Should flag col 1 (test_image): {cols:?}");
1710        assert!(cols.contains(&29), "Should flag col 29 (just_test): {cols:?}");
1711    }
1712
1713    #[test]
1714    fn test_frontmatter_yaml_keys_not_flagged() {
1715        // YAML keys in frontmatter should NOT be checked for proper name violations.
1716        // Only values should be checked.
1717        let rule = MD044ProperNames::new(vec!["Test".to_string()], true);
1718
1719        let content = "---\ntitle: Heading\ntest: Some Test value\n---\n\nTest\n";
1720        let ctx = create_context(content);
1721        let result = rule.check(&ctx).unwrap();
1722
1723        // "test" in the YAML key (line 3) should NOT be flagged
1724        // "Test" in the YAML value (line 3) is correct capitalization, no flag
1725        // "Test" in body (line 6) is correct capitalization, no flag
1726        assert!(
1727            result.is_empty(),
1728            "Should not flag YAML keys or correctly capitalized values: {result:?}"
1729        );
1730    }
1731
1732    #[test]
1733    fn test_frontmatter_yaml_values_flagged() {
1734        // Incorrectly capitalized names in YAML values should be flagged.
1735        let rule = MD044ProperNames::new(vec!["Test".to_string()], true);
1736
1737        let content = "---\ntitle: Heading\nkey: a test value\n---\n\nTest\n";
1738        let ctx = create_context(content);
1739        let result = rule.check(&ctx).unwrap();
1740
1741        // "test" in the YAML value (line 3) SHOULD be flagged
1742        assert_eq!(result.len(), 1, "Should flag 'test' in YAML value: {result:?}");
1743        assert_eq!(result[0].line, 3);
1744        assert_eq!(result[0].column, 8); // "key: a " = 7 chars, then "test" at column 8
1745    }
1746
1747    #[test]
1748    fn test_frontmatter_key_matches_name_not_flagged() {
1749        // A YAML key that happens to match a configured name should NOT be flagged.
1750        let rule = MD044ProperNames::new(vec!["Test".to_string()], true);
1751
1752        let content = "---\ntest: other value\n---\n\nBody text\n";
1753        let ctx = create_context(content);
1754        let result = rule.check(&ctx).unwrap();
1755
1756        assert!(
1757            result.is_empty(),
1758            "Should not flag YAML key that matches configured name: {result:?}"
1759        );
1760    }
1761
1762    #[test]
1763    fn test_frontmatter_empty_value_not_flagged() {
1764        // YAML key with no value should be skipped entirely.
1765        let rule = MD044ProperNames::new(vec!["Test".to_string()], true);
1766
1767        let content = "---\ntest:\ntest: \n---\n\nBody text\n";
1768        let ctx = create_context(content);
1769        let result = rule.check(&ctx).unwrap();
1770
1771        assert!(
1772            result.is_empty(),
1773            "Should not flag YAML keys with empty values: {result:?}"
1774        );
1775    }
1776
1777    #[test]
1778    fn test_frontmatter_nested_yaml_key_not_flagged() {
1779        // Nested/indented YAML keys should also be skipped.
1780        let rule = MD044ProperNames::new(vec!["Test".to_string()], true);
1781
1782        let content = "---\nparent:\n  test: nested value\n---\n\nBody text\n";
1783        let ctx = create_context(content);
1784        let result = rule.check(&ctx).unwrap();
1785
1786        // "test" as a nested key should NOT be flagged
1787        assert!(result.is_empty(), "Should not flag nested YAML keys: {result:?}");
1788    }
1789
1790    #[test]
1791    fn test_frontmatter_list_items_checked() {
1792        // YAML list items are values and should be checked for proper names.
1793        let rule = MD044ProperNames::new(vec!["Test".to_string()], true);
1794
1795        let content = "---\ntags:\n  - test\n  - other\n---\n\nBody text\n";
1796        let ctx = create_context(content);
1797        let result = rule.check(&ctx).unwrap();
1798
1799        // "test" as a list item value SHOULD be flagged
1800        assert_eq!(result.len(), 1, "Should flag 'test' in YAML list item: {result:?}");
1801        assert_eq!(result[0].line, 3);
1802    }
1803
1804    #[test]
1805    fn test_frontmatter_value_with_multiple_colons() {
1806        // For "key: value: more", key is before first colon.
1807        let rule = MD044ProperNames::new(vec!["Test".to_string()], true);
1808
1809        let content = "---\ntest: description: a test thing\n---\n\nBody text\n";
1810        let ctx = create_context(content);
1811        let result = rule.check(&ctx).unwrap();
1812
1813        // "test" as key should NOT be flagged
1814        // "test" in value portion ("description: a test thing") SHOULD be flagged
1815        assert_eq!(
1816            result.len(),
1817            1,
1818            "Should flag 'test' in value after first colon: {result:?}"
1819        );
1820        assert_eq!(result[0].line, 2);
1821        assert!(result[0].column > 6, "Violation column should be in value portion");
1822    }
1823
1824    #[test]
1825    fn test_frontmatter_does_not_affect_body() {
1826        // Body text after frontmatter should still be fully checked.
1827        let rule = MD044ProperNames::new(vec!["Test".to_string()], true);
1828
1829        let content = "---\ntitle: Heading\n---\n\ntest should be flagged here\n";
1830        let ctx = create_context(content);
1831        let result = rule.check(&ctx).unwrap();
1832
1833        assert_eq!(result.len(), 1, "Should flag 'test' in body text: {result:?}");
1834        assert_eq!(result[0].line, 5);
1835    }
1836
1837    #[test]
1838    fn test_frontmatter_fix_corrects_values_preserves_keys() {
1839        // Fix should correct YAML values but preserve keys.
1840        let rule = MD044ProperNames::new(vec!["Test".to_string()], true);
1841
1842        let content = "---\ntest: a test value\n---\n\ntest here\n";
1843        let ctx = create_context(content);
1844        let fixed = rule.fix(&ctx).unwrap();
1845
1846        // Key "test" should remain lowercase; value "test" should become "Test"
1847        assert_eq!(fixed, "---\ntest: a Test value\n---\n\nTest here\n");
1848    }
1849
1850    #[test]
1851    fn test_frontmatter_multiword_value_flagged() {
1852        // Multiple proper names in a single YAML value should all be flagged.
1853        let rule = MD044ProperNames::new(vec!["JavaScript".to_string(), "TypeScript".to_string()], true);
1854
1855        let content = "---\ndescription: Learn javascript and typescript\n---\n\nBody\n";
1856        let ctx = create_context(content);
1857        let result = rule.check(&ctx).unwrap();
1858
1859        assert_eq!(result.len(), 2, "Should flag both names in YAML value: {result:?}");
1860        assert!(result.iter().all(|w| w.line == 2));
1861    }
1862
1863    #[test]
1864    fn test_frontmatter_yaml_comments_not_checked() {
1865        // YAML comments inside frontmatter should be skipped entirely.
1866        let rule = MD044ProperNames::new(vec!["Test".to_string()], true);
1867
1868        let content = "---\n# test comment\ntitle: Heading\n---\n\nBody text\n";
1869        let ctx = create_context(content);
1870        let result = rule.check(&ctx).unwrap();
1871
1872        assert!(result.is_empty(), "Should not flag names in YAML comments: {result:?}");
1873    }
1874
1875    #[test]
1876    fn test_frontmatter_delimiters_not_checked() {
1877        // Frontmatter delimiter lines (--- or +++) should never be checked.
1878        let rule = MD044ProperNames::new(vec!["Test".to_string()], true);
1879
1880        let content = "---\ntitle: Heading\n---\n\ntest here\n";
1881        let ctx = create_context(content);
1882        let result = rule.check(&ctx).unwrap();
1883
1884        // Only the body "test" on line 5 should be flagged
1885        assert_eq!(result.len(), 1, "Should only flag body text: {result:?}");
1886        assert_eq!(result[0].line, 5);
1887    }
1888
1889    #[test]
1890    fn test_frontmatter_continuation_lines_checked() {
1891        // Continuation lines (indented, no colon) are value content and should be checked.
1892        let rule = MD044ProperNames::new(vec!["Test".to_string()], true);
1893
1894        let content = "---\ndescription: >\n  a test value\n  continued here\n---\n\nBody\n";
1895        let ctx = create_context(content);
1896        let result = rule.check(&ctx).unwrap();
1897
1898        // "test" on the continuation line should be flagged
1899        assert_eq!(result.len(), 1, "Should flag 'test' in continuation line: {result:?}");
1900        assert_eq!(result[0].line, 3);
1901    }
1902
1903    #[test]
1904    fn test_frontmatter_quoted_values_checked() {
1905        // Quoted YAML values should have their content checked (inside the quotes).
1906        let rule = MD044ProperNames::new(vec!["Test".to_string()], true);
1907
1908        let content = "---\ntitle: \"a test title\"\n---\n\nBody\n";
1909        let ctx = create_context(content);
1910        let result = rule.check(&ctx).unwrap();
1911
1912        assert_eq!(result.len(), 1, "Should flag 'test' in quoted YAML value: {result:?}");
1913        assert_eq!(result[0].line, 2);
1914    }
1915
1916    #[test]
1917    fn test_frontmatter_single_quoted_values_checked() {
1918        // Single-quoted YAML values should have their content checked.
1919        let rule = MD044ProperNames::new(vec!["Test".to_string()], true);
1920
1921        let content = "---\ntitle: 'a test title'\n---\n\nBody\n";
1922        let ctx = create_context(content);
1923        let result = rule.check(&ctx).unwrap();
1924
1925        assert_eq!(
1926            result.len(),
1927            1,
1928            "Should flag 'test' in single-quoted YAML value: {result:?}"
1929        );
1930        assert_eq!(result[0].line, 2);
1931    }
1932
1933    #[test]
1934    fn test_frontmatter_fix_multiword_values() {
1935        // Fix should correct all proper names in frontmatter values.
1936        let rule = MD044ProperNames::new(vec!["JavaScript".to_string(), "TypeScript".to_string()], true);
1937
1938        let content = "---\ndescription: Learn javascript and typescript\n---\n\nBody\n";
1939        let ctx = create_context(content);
1940        let fixed = rule.fix(&ctx).unwrap();
1941
1942        assert_eq!(
1943            fixed,
1944            "---\ndescription: Learn JavaScript and TypeScript\n---\n\nBody\n"
1945        );
1946    }
1947
1948    #[test]
1949    fn test_frontmatter_fix_preserves_yaml_structure() {
1950        // Fix should preserve YAML structure while correcting values.
1951        let rule = MD044ProperNames::new(vec!["Test".to_string()], true);
1952
1953        let content = "---\ntags:\n  - test\n  - other\ntitle: a test doc\n---\n\ntest body\n";
1954        let ctx = create_context(content);
1955        let fixed = rule.fix(&ctx).unwrap();
1956
1957        assert_eq!(
1958            fixed,
1959            "---\ntags:\n  - Test\n  - other\ntitle: a Test doc\n---\n\nTest body\n"
1960        );
1961    }
1962
1963    #[test]
1964    fn test_frontmatter_toml_delimiters_not_checked() {
1965        // TOML frontmatter with +++ delimiters should also be handled.
1966        let rule = MD044ProperNames::new(vec!["Test".to_string()], true);
1967
1968        let content = "+++\ntitle = \"a test title\"\n+++\n\ntest body\n";
1969        let ctx = create_context(content);
1970        let result = rule.check(&ctx).unwrap();
1971
1972        // "title" as TOML key should NOT be flagged
1973        // "test" in TOML quoted value SHOULD be flagged (line 2)
1974        // "test" in body SHOULD be flagged (line 5)
1975        assert_eq!(result.len(), 2, "Should flag TOML value and body: {result:?}");
1976        let fm_violations: Vec<_> = result.iter().filter(|w| w.line == 2).collect();
1977        assert_eq!(fm_violations.len(), 1, "Should flag 'test' in TOML value: {result:?}");
1978        let body_violations: Vec<_> = result.iter().filter(|w| w.line == 5).collect();
1979        assert_eq!(body_violations.len(), 1, "Should flag body 'test': {result:?}");
1980    }
1981
1982    #[test]
1983    fn test_frontmatter_toml_key_not_flagged() {
1984        // TOML keys should NOT be flagged, only values.
1985        let rule = MD044ProperNames::new(vec!["Test".to_string()], true);
1986
1987        let content = "+++\ntest = \"other value\"\n+++\n\nBody text\n";
1988        let ctx = create_context(content);
1989        let result = rule.check(&ctx).unwrap();
1990
1991        assert!(
1992            result.is_empty(),
1993            "Should not flag TOML key that matches configured name: {result:?}"
1994        );
1995    }
1996
1997    #[test]
1998    fn test_frontmatter_toml_fix_preserves_keys() {
1999        // Fix should correct TOML values but preserve keys.
2000        let rule = MD044ProperNames::new(vec!["Test".to_string()], true);
2001
2002        let content = "+++\ntest = \"a test value\"\n+++\n\ntest here\n";
2003        let ctx = create_context(content);
2004        let fixed = rule.fix(&ctx).unwrap();
2005
2006        // Key "test" should remain lowercase; value "test" should become "Test"
2007        assert_eq!(fixed, "+++\ntest = \"a Test value\"\n+++\n\nTest here\n");
2008    }
2009
2010    #[test]
2011    fn test_frontmatter_list_item_mapping_key_not_flagged() {
2012        // In "- test: nested value", "test" is a YAML key within a list-item mapping.
2013        // The key should NOT be flagged; only the value should be checked.
2014        let rule = MD044ProperNames::new(vec!["Test".to_string()], true);
2015
2016        let content = "---\nitems:\n  - test: nested value\n---\n\nBody text\n";
2017        let ctx = create_context(content);
2018        let result = rule.check(&ctx).unwrap();
2019
2020        assert!(
2021            result.is_empty(),
2022            "Should not flag YAML key in list-item mapping: {result:?}"
2023        );
2024    }
2025
2026    #[test]
2027    fn test_frontmatter_list_item_mapping_value_flagged() {
2028        // In "- key: test value", the value portion should be checked.
2029        let rule = MD044ProperNames::new(vec!["Test".to_string()], true);
2030
2031        let content = "---\nitems:\n  - key: a test value\n---\n\nBody text\n";
2032        let ctx = create_context(content);
2033        let result = rule.check(&ctx).unwrap();
2034
2035        assert_eq!(
2036            result.len(),
2037            1,
2038            "Should flag 'test' in list-item mapping value: {result:?}"
2039        );
2040        assert_eq!(result[0].line, 3);
2041    }
2042
2043    #[test]
2044    fn test_frontmatter_bare_list_item_still_flagged() {
2045        // Bare list items without a colon (e.g., "- test") are values and should be flagged.
2046        let rule = MD044ProperNames::new(vec!["Test".to_string()], true);
2047
2048        let content = "---\ntags:\n  - test\n  - other\n---\n\nBody text\n";
2049        let ctx = create_context(content);
2050        let result = rule.check(&ctx).unwrap();
2051
2052        assert_eq!(result.len(), 1, "Should flag 'test' in bare list item: {result:?}");
2053        assert_eq!(result[0].line, 3);
2054    }
2055
2056    #[test]
2057    fn test_frontmatter_flow_mapping_not_flagged() {
2058        // Flow mappings like {test: value} contain YAML keys that should not be flagged.
2059        // The entire flow construct should be skipped.
2060        let rule = MD044ProperNames::new(vec!["Test".to_string()], true);
2061
2062        let content = "---\nflow_map: {test: value, other: test}\n---\n\nBody text\n";
2063        let ctx = create_context(content);
2064        let result = rule.check(&ctx).unwrap();
2065
2066        assert!(
2067            result.is_empty(),
2068            "Should not flag names inside flow mappings: {result:?}"
2069        );
2070    }
2071
2072    #[test]
2073    fn test_frontmatter_flow_sequence_not_flagged() {
2074        // Flow sequences like [test, other] should also be skipped.
2075        let rule = MD044ProperNames::new(vec!["Test".to_string()], true);
2076
2077        let content = "---\nitems: [test, other, test]\n---\n\nBody text\n";
2078        let ctx = create_context(content);
2079        let result = rule.check(&ctx).unwrap();
2080
2081        assert!(
2082            result.is_empty(),
2083            "Should not flag names inside flow sequences: {result:?}"
2084        );
2085    }
2086
2087    #[test]
2088    fn test_frontmatter_list_item_mapping_fix_preserves_key() {
2089        // Fix should correct values in list-item mappings but preserve keys.
2090        let rule = MD044ProperNames::new(vec!["Test".to_string()], true);
2091
2092        let content = "---\nitems:\n  - test: a test value\n---\n\ntest here\n";
2093        let ctx = create_context(content);
2094        let fixed = rule.fix(&ctx).unwrap();
2095
2096        // "test" as list-item key should remain lowercase;
2097        // "test" in value portion should become "Test"
2098        assert_eq!(fixed, "---\nitems:\n  - test: a Test value\n---\n\nTest here\n");
2099    }
2100
2101    // --- Angle-bracket URL tests (issue #457) ---
2102
2103    #[test]
2104    fn test_angle_bracket_url_in_html_comment_not_flagged() {
2105        // Angle-bracket URLs inside HTML comments should be skipped
2106        let config = MD044Config {
2107            names: vec!["Test".to_string()],
2108            ..MD044Config::default()
2109        };
2110        let rule = MD044ProperNames::from_config_struct(config);
2111
2112        let content = "---\ntitle: Level 1 heading\n---\n\n<https://www.example.test>\n\n<!-- This is a Test https://www.example.test -->\n<!-- This is a Test <https://www.example.test> -->\n";
2113        let ctx = create_context(content);
2114        let result = rule.check(&ctx).unwrap();
2115
2116        // Line 7: "Test" in comment prose before bare URL -- already correct capitalization
2117        // Line 7: "test" in bare URL (not in angle brackets) -- but "test" is in URL domain, not prose.
2118        //   However, .example.test has "test" at a word boundary (after '.'), so it IS flagged.
2119        // Line 8: "Test" in comment prose -- correct capitalization, not flagged
2120        // Line 8: "test" in <https://www.example.test> -- inside angle-bracket URL, NOT flagged
2121
2122        // The key assertion: line 8's angle-bracket URL should NOT produce a warning
2123        let line8_warnings: Vec<_> = result.iter().filter(|w| w.line == 8).collect();
2124        assert!(
2125            line8_warnings.is_empty(),
2126            "Should not flag names inside angle-bracket URLs in HTML comments: {line8_warnings:?}"
2127        );
2128    }
2129
2130    #[test]
2131    fn test_bare_url_in_html_comment_still_flagged() {
2132        // Bare URLs (not in angle brackets) inside HTML comments should still be checked
2133        let config = MD044Config {
2134            names: vec!["Test".to_string()],
2135            ..MD044Config::default()
2136        };
2137        let rule = MD044ProperNames::from_config_struct(config);
2138
2139        let content = "<!-- This is a test https://www.example.test -->\n";
2140        let ctx = create_context(content);
2141        let result = rule.check(&ctx).unwrap();
2142
2143        // "test" appears as prose text before URL and also in the bare URL domain
2144        // At minimum, the prose "test" should be flagged
2145        assert!(
2146            !result.is_empty(),
2147            "Should flag 'test' in prose text of HTML comment with bare URL"
2148        );
2149    }
2150
2151    #[test]
2152    fn test_angle_bracket_url_in_regular_markdown_not_flagged() {
2153        // Angle-bracket URLs in regular markdown are already handled by the link parser,
2154        // but the angle-bracket check provides a safety net
2155        let rule = MD044ProperNames::new(vec!["Test".to_string()], true);
2156
2157        let content = "<https://www.example.test>\n";
2158        let ctx = create_context(content);
2159        let result = rule.check(&ctx).unwrap();
2160
2161        assert!(
2162            result.is_empty(),
2163            "Should not flag names inside angle-bracket URLs in regular markdown: {result:?}"
2164        );
2165    }
2166
2167    #[test]
2168    fn test_multiple_angle_bracket_urls_in_one_comment() {
2169        let config = MD044Config {
2170            names: vec!["Test".to_string()],
2171            ..MD044Config::default()
2172        };
2173        let rule = MD044ProperNames::from_config_struct(config);
2174
2175        let content = "<!-- See <https://test.example.com> and <https://www.example.test> for details -->\n";
2176        let ctx = create_context(content);
2177        let result = rule.check(&ctx).unwrap();
2178
2179        // Both URLs are inside angle brackets, so "test" inside them should NOT be flagged
2180        assert!(
2181            result.is_empty(),
2182            "Should not flag names inside multiple angle-bracket URLs: {result:?}"
2183        );
2184    }
2185
2186    #[test]
2187    fn test_angle_bracket_non_url_still_flagged() {
2188        // <Test> is NOT a URL (no scheme), so is_in_angle_bracket_url does NOT protect it.
2189        // Whether it gets flagged depends on HTML tag detection, not on our URL check.
2190        assert!(
2191            !MD044ProperNames::is_in_angle_bracket_url("<test> which is not a URL.", 1),
2192            "is_in_angle_bracket_url should return false for non-URL angle brackets"
2193        );
2194    }
2195
2196    #[test]
2197    fn test_angle_bracket_mailto_url_not_flagged() {
2198        let config = MD044Config {
2199            names: vec!["Test".to_string()],
2200            ..MD044Config::default()
2201        };
2202        let rule = MD044ProperNames::from_config_struct(config);
2203
2204        let content = "<!-- Contact <mailto:test@example.com> for help -->\n";
2205        let ctx = create_context(content);
2206        let result = rule.check(&ctx).unwrap();
2207
2208        assert!(
2209            result.is_empty(),
2210            "Should not flag names inside angle-bracket mailto URLs: {result:?}"
2211        );
2212    }
2213
2214    #[test]
2215    fn test_angle_bracket_ftp_url_not_flagged() {
2216        let config = MD044Config {
2217            names: vec!["Test".to_string()],
2218            ..MD044Config::default()
2219        };
2220        let rule = MD044ProperNames::from_config_struct(config);
2221
2222        let content = "<!-- Download from <ftp://test.example.com/file> -->\n";
2223        let ctx = create_context(content);
2224        let result = rule.check(&ctx).unwrap();
2225
2226        assert!(
2227            result.is_empty(),
2228            "Should not flag names inside angle-bracket FTP URLs: {result:?}"
2229        );
2230    }
2231
2232    #[test]
2233    fn test_angle_bracket_url_fix_preserves_url() {
2234        // Fix should not modify text inside angle-bracket URLs
2235        let config = MD044Config {
2236            names: vec!["Test".to_string()],
2237            ..MD044Config::default()
2238        };
2239        let rule = MD044ProperNames::from_config_struct(config);
2240
2241        let content = "<!-- test text <https://www.example.test> -->\n";
2242        let ctx = create_context(content);
2243        let fixed = rule.fix(&ctx).unwrap();
2244
2245        // "test" in prose should be fixed, URL should be preserved
2246        assert!(
2247            fixed.contains("<https://www.example.test>"),
2248            "Fix should preserve angle-bracket URLs: {fixed}"
2249        );
2250        assert!(
2251            fixed.contains("Test text"),
2252            "Fix should correct prose 'test' to 'Test': {fixed}"
2253        );
2254    }
2255
2256    #[test]
2257    fn test_is_in_angle_bracket_url_helper() {
2258        // Direct tests of the helper function
2259        let line = "text <https://example.test> more text";
2260
2261        // Inside the URL
2262        assert!(MD044ProperNames::is_in_angle_bracket_url(line, 5)); // '<'
2263        assert!(MD044ProperNames::is_in_angle_bracket_url(line, 6)); // 'h'
2264        assert!(MD044ProperNames::is_in_angle_bracket_url(line, 15)); // middle of URL
2265        assert!(MD044ProperNames::is_in_angle_bracket_url(line, 26)); // '>'
2266
2267        // Outside the URL
2268        assert!(!MD044ProperNames::is_in_angle_bracket_url(line, 0)); // 't' at start
2269        assert!(!MD044ProperNames::is_in_angle_bracket_url(line, 4)); // space before '<'
2270        assert!(!MD044ProperNames::is_in_angle_bracket_url(line, 27)); // space after '>'
2271
2272        // Non-URL angle brackets
2273        assert!(!MD044ProperNames::is_in_angle_bracket_url("<notaurl>", 1));
2274
2275        // mailto scheme
2276        assert!(MD044ProperNames::is_in_angle_bracket_url(
2277            "<mailto:test@example.com>",
2278            10
2279        ));
2280
2281        // ftp scheme
2282        assert!(MD044ProperNames::is_in_angle_bracket_url(
2283            "<ftp://test.example.com>",
2284            10
2285        ));
2286    }
2287
2288    #[test]
2289    fn test_is_in_angle_bracket_url_uppercase_scheme() {
2290        // RFC 3986: URI schemes are case-insensitive
2291        assert!(MD044ProperNames::is_in_angle_bracket_url(
2292            "<HTTPS://test.example.com>",
2293            10
2294        ));
2295        assert!(MD044ProperNames::is_in_angle_bracket_url(
2296            "<Http://test.example.com>",
2297            10
2298        ));
2299    }
2300
2301    #[test]
2302    fn test_is_in_angle_bracket_url_uncommon_schemes() {
2303        // ssh scheme
2304        assert!(MD044ProperNames::is_in_angle_bracket_url(
2305            "<ssh://test@example.com>",
2306            10
2307        ));
2308        // file scheme
2309        assert!(MD044ProperNames::is_in_angle_bracket_url("<file:///test/path>", 10));
2310        // data scheme (no authority, just colon)
2311        assert!(MD044ProperNames::is_in_angle_bracket_url("<data:text/plain;test>", 10));
2312    }
2313
2314    #[test]
2315    fn test_is_in_angle_bracket_url_unclosed() {
2316        // Unclosed angle bracket should NOT match
2317        assert!(!MD044ProperNames::is_in_angle_bracket_url(
2318            "<https://test.example.com",
2319            10
2320        ));
2321    }
2322
2323    #[test]
2324    fn test_vale_inline_config_comments_not_flagged() {
2325        let config = MD044Config {
2326            names: vec!["Vale".to_string(), "JavaScript".to_string()],
2327            ..MD044Config::default()
2328        };
2329        let rule = MD044ProperNames::from_config_struct(config);
2330
2331        let content = "\
2332<!-- vale off -->
2333Some javascript text here.
2334<!-- vale on -->
2335<!-- vale Style.Rule = NO -->
2336More javascript text.
2337<!-- vale Style.Rule = YES -->
2338<!-- vale JavaScript.Grammar = NO -->
2339";
2340        let ctx = create_context(content);
2341        let result = rule.check(&ctx).unwrap();
2342
2343        // Only the body text lines (2, 5) should be flagged for "javascript"
2344        assert_eq!(result.len(), 2, "Should only flag body lines, not Vale config comments");
2345        assert_eq!(result[0].line, 2);
2346        assert_eq!(result[1].line, 5);
2347    }
2348
2349    #[test]
2350    fn test_remark_lint_inline_config_comments_not_flagged() {
2351        let config = MD044Config {
2352            names: vec!["JavaScript".to_string()],
2353            ..MD044Config::default()
2354        };
2355        let rule = MD044ProperNames::from_config_struct(config);
2356
2357        let content = "\
2358<!-- lint disable remark-lint-some-rule -->
2359Some javascript text here.
2360<!-- lint enable remark-lint-some-rule -->
2361<!-- lint ignore remark-lint-some-rule -->
2362More javascript text.
2363";
2364        let ctx = create_context(content);
2365        let result = rule.check(&ctx).unwrap();
2366
2367        assert_eq!(
2368            result.len(),
2369            2,
2370            "Should only flag body lines, not remark-lint config comments"
2371        );
2372        assert_eq!(result[0].line, 2);
2373        assert_eq!(result[1].line, 5);
2374    }
2375
2376    #[test]
2377    fn test_fix_does_not_modify_vale_remark_lint_comments() {
2378        let config = MD044Config {
2379            names: vec!["JavaScript".to_string(), "Vale".to_string()],
2380            ..MD044Config::default()
2381        };
2382        let rule = MD044ProperNames::from_config_struct(config);
2383
2384        let content = "\
2385<!-- vale off -->
2386Some javascript text.
2387<!-- vale on -->
2388<!-- lint disable remark-lint-some-rule -->
2389More javascript text.
2390<!-- lint enable remark-lint-some-rule -->
2391";
2392        let ctx = create_context(content);
2393        let fixed = rule.fix(&ctx).unwrap();
2394
2395        // Config directive lines must be preserved unchanged
2396        assert!(fixed.contains("<!-- vale off -->"));
2397        assert!(fixed.contains("<!-- vale on -->"));
2398        assert!(fixed.contains("<!-- lint disable remark-lint-some-rule -->"));
2399        assert!(fixed.contains("<!-- lint enable remark-lint-some-rule -->"));
2400        // Body text should be fixed
2401        assert!(fixed.contains("Some JavaScript text."));
2402        assert!(fixed.contains("More JavaScript text."));
2403    }
2404
2405    #[test]
2406    fn test_mixed_tool_directives_all_skipped() {
2407        let config = MD044Config {
2408            names: vec!["JavaScript".to_string(), "Vale".to_string()],
2409            ..MD044Config::default()
2410        };
2411        let rule = MD044ProperNames::from_config_struct(config);
2412
2413        let content = "\
2414<!-- rumdl-disable MD044 -->
2415Some javascript text.
2416<!-- markdownlint-disable -->
2417More javascript text.
2418<!-- vale off -->
2419Even more javascript text.
2420<!-- lint disable some-rule -->
2421Final javascript text.
2422<!-- rumdl-enable MD044 -->
2423<!-- markdownlint-enable -->
2424<!-- vale on -->
2425<!-- lint enable some-rule -->
2426";
2427        let ctx = create_context(content);
2428        let result = rule.check(&ctx).unwrap();
2429
2430        // Only body text lines should be flagged (lines 2, 4, 6, 8)
2431        assert_eq!(
2432            result.len(),
2433            4,
2434            "Should only flag body lines, not any tool directive comments"
2435        );
2436        assert_eq!(result[0].line, 2);
2437        assert_eq!(result[1].line, 4);
2438        assert_eq!(result[2].line, 6);
2439        assert_eq!(result[3].line, 8);
2440    }
2441
2442    #[test]
2443    fn test_vale_remark_lint_edge_cases_not_matched() {
2444        let config = MD044Config {
2445            names: vec!["JavaScript".to_string(), "Vale".to_string()],
2446            ..MD044Config::default()
2447        };
2448        let rule = MD044ProperNames::from_config_struct(config);
2449
2450        // These are regular HTML comments, NOT tool directives:
2451        // - "<!-- vale -->" is not a valid Vale directive (no action keyword)
2452        // - "<!-- vale is a tool -->" starts with "vale" but is prose, not a directive
2453        // - "<!-- valedictorian javascript -->" does not start with "<!-- vale "
2454        // - "<!-- linting javascript tips -->" does not start with "<!-- lint "
2455        // - "<!-- vale javascript -->" starts with "vale" but has no action keyword
2456        // - "<!-- lint your javascript code -->" starts with "lint" but has no action keyword
2457        let content = "\
2458<!-- vale -->
2459<!-- vale is a tool for writing -->
2460<!-- valedictorian javascript -->
2461<!-- linting javascript tips -->
2462<!-- vale javascript -->
2463<!-- lint your javascript code -->
2464";
2465        let ctx = create_context(content);
2466        let result = rule.check(&ctx).unwrap();
2467
2468        // Line 1: "<!-- vale -->" contains "vale" (wrong case for "Vale") -> flagged
2469        // Line 2: "<!-- vale is a tool for writing -->" contains "vale" -> flagged
2470        // Line 3: "<!-- valedictorian javascript -->" contains "javascript" -> flagged
2471        // Line 4: "<!-- linting javascript tips -->" contains "javascript" -> flagged
2472        // Line 5: "<!-- vale javascript -->" contains "vale" and "javascript" -> flagged for both
2473        // Line 6: "<!-- lint your javascript code -->" contains "javascript" -> flagged
2474        assert_eq!(
2475            result.len(),
2476            7,
2477            "Should flag proper names in non-directive HTML comments: got {result:?}"
2478        );
2479        assert_eq!(result[0].line, 1); // "vale" in <!-- vale -->
2480        assert_eq!(result[1].line, 2); // "vale" in <!-- vale is a tool -->
2481        assert_eq!(result[2].line, 3); // "javascript" in <!-- valedictorian javascript -->
2482        assert_eq!(result[3].line, 4); // "javascript" in <!-- linting javascript tips -->
2483        assert_eq!(result[4].line, 5); // "vale" in <!-- vale javascript -->
2484        assert_eq!(result[5].line, 5); // "javascript" in <!-- vale javascript -->
2485        assert_eq!(result[6].line, 6); // "javascript" in <!-- lint your javascript code -->
2486    }
2487
2488    #[test]
2489    fn test_vale_style_directives_skipped() {
2490        let config = MD044Config {
2491            names: vec!["JavaScript".to_string(), "Vale".to_string()],
2492            ..MD044Config::default()
2493        };
2494        let rule = MD044ProperNames::from_config_struct(config);
2495
2496        // These ARE valid Vale directives and should be skipped:
2497        let content = "\
2498<!-- vale style = MyStyle -->
2499<!-- vale styles = Style1, Style2 -->
2500<!-- vale MyRule.Name = YES -->
2501<!-- vale MyRule.Name = NO -->
2502Some javascript text.
2503";
2504        let ctx = create_context(content);
2505        let result = rule.check(&ctx).unwrap();
2506
2507        // Only line 5 (body text) should be flagged
2508        assert_eq!(
2509            result.len(),
2510            1,
2511            "Should only flag body lines, not Vale style/rule directives: got {result:?}"
2512        );
2513        assert_eq!(result[0].line, 5);
2514    }
2515}
rumdl_lib/rules/md044_proper_names.rs

rumdl_lib/rules/
md044_proper_names.rs