rumdl_lib/rules/
md044_proper_names.rs

1use crate::utils::fast_hash;
2use crate::utils::regex_cache::{escape_regex, get_cached_fancy_regex};
3
4use crate::rule::{Fix, LintError, LintResult, LintWarning, Rule, Severity};
5use std::collections::{HashMap, HashSet};
6use std::sync::{Arc, Mutex};
7
8mod md044_config;
9pub use md044_config::MD044Config;
10
11type WarningPosition = (usize, usize, String); // (line, column, found_name)
12
13/// Rule MD044: Proper names should be capitalized
14///
15/// See [docs/md044.md](../../docs/md044.md) for full documentation, configuration, and examples.
16///
17/// This rule is triggered when proper names are not capitalized correctly in the document.
18/// For example, if you have defined "JavaScript" as a proper name, the rule will flag any
19/// occurrences of "javascript" or "Javascript" as violations.
20///
21/// ## Purpose
22///
23/// Ensuring consistent capitalization of proper names improves document quality and
24/// professionalism. This is especially important for technical documentation where
25/// product names, programming languages, and technologies often have specific
26/// capitalization conventions.
27///
28/// ## Configuration Options
29///
30/// The rule supports the following configuration options:
31///
32/// ```yaml
33/// MD044:
34///   names: []                # List of proper names to check for correct capitalization
35///   code-blocks: false       # Whether to check code blocks (default: false)
36/// ```
37///
38/// Example configuration:
39///
40/// ```yaml
41/// MD044:
42///   names: ["JavaScript", "Node.js", "TypeScript"]
43///   code-blocks: true
44/// ```
45///
46/// ## Performance Optimizations
47///
48/// This rule implements several performance optimizations:
49///
50/// 1. **Regex Caching**: Pre-compiles and caches regex patterns for each proper name
51/// 2. **Content Caching**: Caches results based on content hashing for repeated checks
52/// 3. **Efficient Text Processing**: Uses optimized algorithms to avoid redundant text processing
53/// 4. **Smart Code Block Detection**: Efficiently identifies and optionally excludes code blocks
54///
55/// ## Edge Cases Handled
56///
57/// - **Word Boundaries**: Only matches complete words, not substrings within other words
58/// - **Case Sensitivity**: Properly handles case-specific matching
59/// - **Code Blocks**: Optionally checks code blocks (controlled by code-blocks setting)
60/// - **Markdown Formatting**: Handles proper names within Markdown formatting elements
61///
62/// ## Fix Behavior
63///
64/// When fixing issues, this rule replaces incorrect capitalization with the correct form
65/// as defined in the configuration.
66///
67/// Check if a trimmed line is an inline config comment (rumdl or markdownlint directives).
68fn is_inline_config_comment(trimmed: &str) -> bool {
69    trimmed.starts_with("<!-- rumdl-") || trimmed.starts_with("<!-- markdownlint-")
70}
71
72#[derive(Clone)]
73pub struct MD044ProperNames {
74    config: MD044Config,
75    // Cache the combined regex pattern string
76    combined_pattern: Option<String>,
77    // Precomputed lowercase name variants for fast pre-checks
78    name_variants: Vec<String>,
79    // Cache for name violations by content hash
80    content_cache: Arc<Mutex<HashMap<u64, Vec<WarningPosition>>>>,
81}
82
83impl MD044ProperNames {
84    pub fn new(names: Vec<String>, code_blocks: bool) -> Self {
85        let config = MD044Config {
86            names,
87            code_blocks,
88            html_elements: true, // Default to checking HTML elements
89            html_comments: true, // Default to checking HTML comments
90        };
91        let combined_pattern = Self::create_combined_pattern(&config);
92        let name_variants = Self::build_name_variants(&config);
93        Self {
94            config,
95            combined_pattern,
96            name_variants,
97            content_cache: Arc::new(Mutex::new(HashMap::new())),
98        }
99    }
100
101    // Helper function for consistent ASCII normalization
102    fn ascii_normalize(s: &str) -> String {
103        s.replace(['é', 'è', 'ê', 'ë'], "e")
104            .replace(['à', 'á', 'â', 'ä', 'ã', 'å'], "a")
105            .replace(['ï', 'î', 'í', 'ì'], "i")
106            .replace(['ü', 'ú', 'ù', 'û'], "u")
107            .replace(['ö', 'ó', 'ò', 'ô', 'õ'], "o")
108            .replace('ñ', "n")
109            .replace('ç', "c")
110    }
111
112    pub fn from_config_struct(config: MD044Config) -> Self {
113        let combined_pattern = Self::create_combined_pattern(&config);
114        let name_variants = Self::build_name_variants(&config);
115        Self {
116            config,
117            combined_pattern,
118            name_variants,
119            content_cache: Arc::new(Mutex::new(HashMap::new())),
120        }
121    }
122
123    // Create a combined regex pattern for all proper names
124    fn create_combined_pattern(config: &MD044Config) -> Option<String> {
125        if config.names.is_empty() {
126            return None;
127        }
128
129        // Create patterns for all names and their variations
130        let mut patterns: Vec<String> = config
131            .names
132            .iter()
133            .flat_map(|name| {
134                let mut variations = vec![];
135                let lower_name = name.to_lowercase();
136
137                // Add the lowercase version
138                variations.push(escape_regex(&lower_name));
139
140                // Add version without dots
141                let lower_name_no_dots = lower_name.replace('.', "");
142                if lower_name != lower_name_no_dots {
143                    variations.push(escape_regex(&lower_name_no_dots));
144                }
145
146                // Add ASCII-normalized versions for common accented characters
147                let ascii_normalized = Self::ascii_normalize(&lower_name);
148
149                if ascii_normalized != lower_name {
150                    variations.push(escape_regex(&ascii_normalized));
151
152                    // Also add version without dots
153                    let ascii_no_dots = ascii_normalized.replace('.', "");
154                    if ascii_normalized != ascii_no_dots {
155                        variations.push(escape_regex(&ascii_no_dots));
156                    }
157                }
158
159                variations
160            })
161            .collect();
162
163        // Sort patterns by length (longest first) to avoid shorter patterns matching within longer ones
164        patterns.sort_by_key(|b| std::cmp::Reverse(b.len()));
165
166        // Combine all patterns into a single regex with capture groups
167        // Don't use \b as it doesn't work with Unicode - we'll check boundaries manually
168        Some(format!(r"(?i)({})", patterns.join("|")))
169    }
170
171    fn build_name_variants(config: &MD044Config) -> Vec<String> {
172        let mut variants = HashSet::new();
173        for name in &config.names {
174            let lower_name = name.to_lowercase();
175            variants.insert(lower_name.clone());
176
177            let lower_no_dots = lower_name.replace('.', "");
178            if lower_name != lower_no_dots {
179                variants.insert(lower_no_dots);
180            }
181
182            let ascii_normalized = Self::ascii_normalize(&lower_name);
183            if ascii_normalized != lower_name {
184                variants.insert(ascii_normalized.clone());
185
186                let ascii_no_dots = ascii_normalized.replace('.', "");
187                if ascii_normalized != ascii_no_dots {
188                    variants.insert(ascii_no_dots);
189                }
190            }
191        }
192
193        variants.into_iter().collect()
194    }
195
196    // Find all name violations in the content and return positions.
197    // `content_lower` is the pre-computed lowercase version of `content` to avoid redundant allocations.
198    fn find_name_violations(
199        &self,
200        content: &str,
201        ctx: &crate::lint_context::LintContext,
202        content_lower: &str,
203    ) -> Vec<WarningPosition> {
204        // Early return: if no names configured or content is empty
205        if self.config.names.is_empty() || content.is_empty() || self.combined_pattern.is_none() {
206            return Vec::new();
207        }
208
209        // Early return: quick check if any of the configured names might be in content
210        let has_potential_matches = self.name_variants.iter().any(|name| content_lower.contains(name));
211
212        if !has_potential_matches {
213            return Vec::new();
214        }
215
216        // Check if we have cached results
217        let hash = fast_hash(content);
218        {
219            // Use a separate scope for borrowing to minimize lock time
220            if let Ok(cache) = self.content_cache.lock()
221                && let Some(cached) = cache.get(&hash)
222            {
223                return cached.clone();
224            }
225        }
226
227        let mut violations = Vec::new();
228
229        // Get the regex from global cache
230        let combined_regex = match &self.combined_pattern {
231            Some(pattern) => match get_cached_fancy_regex(pattern) {
232                Ok(regex) => regex,
233                Err(_) => return Vec::new(),
234            },
235            None => return Vec::new(),
236        };
237
238        // Use ctx.lines for better performance
239        for (line_idx, line_info) in ctx.lines.iter().enumerate() {
240            let line_num = line_idx + 1;
241            let line = line_info.content(ctx.content);
242
243            // Skip code fence lines (```language or ~~~language)
244            let trimmed = line.trim_start();
245            if trimmed.starts_with("```") || trimmed.starts_with("~~~") {
246                continue;
247            }
248
249            // Skip if in code block (when code_blocks = false)
250            if !self.config.code_blocks && line_info.in_code_block {
251                continue;
252            }
253
254            // Skip if in HTML block (when html_elements = false)
255            if !self.config.html_elements && line_info.in_html_block {
256                continue;
257            }
258
259            // Skip HTML comments using pre-computed line flag
260            if !self.config.html_comments && line_info.in_html_comment {
261                continue;
262            }
263
264            // Skip JSX expressions and MDX comments (MDX flavor)
265            if line_info.in_jsx_expression || line_info.in_mdx_comment {
266                continue;
267            }
268
269            // Skip Obsidian comments (Obsidian flavor)
270            if line_info.in_obsidian_comment {
271                continue;
272            }
273
274            // For frontmatter lines, determine offset where checkable value content starts.
275            // YAML keys should not be checked against proper names - only values.
276            let fm_value_offset = if line_info.in_front_matter {
277                Self::frontmatter_value_offset(line)
278            } else {
279                0
280            };
281            if fm_value_offset == usize::MAX {
282                continue;
283            }
284
285            // Skip inline config comments (rumdl-disable, markdownlint-enable, etc.)
286            if is_inline_config_comment(trimmed) {
287                continue;
288            }
289
290            // Early return: skip lines that don't contain any potential matches
291            let line_lower = line.to_lowercase();
292            let has_line_matches = self.name_variants.iter().any(|name| line_lower.contains(name));
293
294            if !has_line_matches {
295                continue;
296            }
297
298            // Use the combined regex to find all matches in one pass
299            for cap_result in combined_regex.find_iter(line) {
300                match cap_result {
301                    Ok(cap) => {
302                        let found_name = &line[cap.start()..cap.end()];
303
304                        // Check word boundaries manually for Unicode support
305                        let start_pos = cap.start();
306                        let end_pos = cap.end();
307
308                        // Skip matches in the key portion of frontmatter lines
309                        if start_pos < fm_value_offset {
310                            continue;
311                        }
312
313                        // Skip matches inside HTML tag attributes (handles multi-line tags)
314                        let byte_pos = line_info.byte_offset + start_pos;
315                        if ctx.is_in_html_tag(byte_pos) {
316                            continue;
317                        }
318
319                        if !Self::is_at_word_boundary(line, start_pos, true)
320                            || !Self::is_at_word_boundary(line, end_pos, false)
321                        {
322                            continue; // Not at word boundary
323                        }
324
325                        // Skip if in inline code when code_blocks is false
326                        if !self.config.code_blocks && ctx.is_in_code_block_or_span(byte_pos) {
327                            continue;
328                        }
329
330                        // Skip if in link URL or reference definition
331                        if Self::is_in_link(ctx, byte_pos) {
332                            continue;
333                        }
334
335                        // Skip if inside an angle-bracket URL (e.g., <https://...>)
336                        // The link parser skips autolinks inside HTML comments,
337                        // so we detect them directly in the line text.
338                        if Self::is_in_angle_bracket_url(line, start_pos) {
339                            continue;
340                        }
341
342                        // Find which proper name this matches
343                        if let Some(proper_name) = self.get_proper_name_for(found_name) {
344                            // Only flag if it's not already correct
345                            if found_name != proper_name {
346                                violations.push((line_num, cap.start() + 1, found_name.to_string()));
347                            }
348                        }
349                    }
350                    Err(e) => {
351                        eprintln!("Regex execution error on line {line_num}: {e}");
352                    }
353                }
354            }
355        }
356
357        // Store in cache (ignore if mutex is poisoned)
358        if let Ok(mut cache) = self.content_cache.lock() {
359            cache.insert(hash, violations.clone());
360        }
361        violations
362    }
363
364    /// Check if a byte position is within a link URL (not link text)
365    ///
366    /// Link text should be checked for proper names, but URLs should be skipped.
367    /// For `[text](url)` - check text, skip url
368    /// For `[text][ref]` - check text, skip reference portion
369    /// For `[[text]]` (WikiLinks) - check text, skip brackets
370    fn is_in_link(ctx: &crate::lint_context::LintContext, byte_pos: usize) -> bool {
371        use pulldown_cmark::LinkType;
372
373        // Binary search links (sorted by byte_offset) to find candidate containing byte_pos
374        let link_idx = ctx.links.partition_point(|link| link.byte_offset <= byte_pos);
375        if link_idx > 0 {
376            let link = &ctx.links[link_idx - 1];
377            if byte_pos < link.byte_end {
378                // WikiLinks [[text]] start with '[[', regular links [text] start with '['
379                let text_start = if matches!(link.link_type, LinkType::WikiLink { .. }) {
380                    link.byte_offset + 2
381                } else {
382                    link.byte_offset + 1
383                };
384                let text_end = text_start + link.text.len();
385
386                // If position is within the text portion, skip only if text is a URL
387                if byte_pos >= text_start && byte_pos < text_end {
388                    return Self::link_text_is_url(&link.text);
389                }
390                // Position is in the URL/reference portion, skip it
391                return true;
392            }
393        }
394
395        // Binary search images (sorted by byte_offset) to find candidate containing byte_pos
396        let image_idx = ctx.images.partition_point(|img| img.byte_offset <= byte_pos);
397        if image_idx > 0 {
398            let image = &ctx.images[image_idx - 1];
399            if byte_pos < image.byte_end {
400                // Image starts with '![' so alt text starts at byte_offset + 2
401                let alt_start = image.byte_offset + 2;
402                let alt_end = alt_start + image.alt_text.len();
403
404                // If position is within the alt text portion, don't skip
405                if byte_pos >= alt_start && byte_pos < alt_end {
406                    return false;
407                }
408                // Position is in the URL/reference portion, skip it
409                return true;
410            }
411        }
412
413        // Check pre-computed reference definitions
414        ctx.is_in_reference_def(byte_pos)
415    }
416
417    /// Check if link text is a URL that should not have proper name corrections.
418    /// Matches markdownlint behavior: skip text starting with `http://`, `https://`, or `www.`.
419    fn link_text_is_url(text: &str) -> bool {
420        let lower = text.trim().to_ascii_lowercase();
421        lower.starts_with("http://") || lower.starts_with("https://") || lower.starts_with("www.")
422    }
423
424    /// Check if a position within a line falls inside an angle-bracket URL (`<scheme://...>`).
425    ///
426    /// The link parser skips autolinks inside HTML comments, so `ctx.links` won't
427    /// contain them. This function detects angle-bracket URLs directly in the line
428    /// text, covering both HTML comments and regular text as a safety net.
429    fn is_in_angle_bracket_url(line: &str, pos: usize) -> bool {
430        let bytes = line.as_bytes();
431        let len = bytes.len();
432        let mut i = 0;
433        while i < len {
434            if bytes[i] == b'<' {
435                let after_open = i + 1;
436                // Check for a valid URI scheme per CommonMark autolink spec:
437                // scheme = [a-zA-Z][a-zA-Z0-9+.-]{0,31}
438                // followed by ':'
439                if after_open < len && bytes[after_open].is_ascii_alphabetic() {
440                    let mut s = after_open + 1;
441                    let scheme_max = (after_open + 32).min(len);
442                    while s < scheme_max
443                        && (bytes[s].is_ascii_alphanumeric()
444                            || bytes[s] == b'+'
445                            || bytes[s] == b'-'
446                            || bytes[s] == b'.')
447                    {
448                        s += 1;
449                    }
450                    if s < len && bytes[s] == b':' {
451                        // Valid scheme found; scan for closing '>' with no spaces or '<'
452                        let mut j = s + 1;
453                        let mut found_close = false;
454                        while j < len {
455                            match bytes[j] {
456                                b'>' => {
457                                    found_close = true;
458                                    break;
459                                }
460                                b' ' | b'<' => break,
461                                _ => j += 1,
462                            }
463                        }
464                        if found_close && pos >= i && pos <= j {
465                            return true;
466                        }
467                        if found_close {
468                            i = j + 1;
469                            continue;
470                        }
471                    }
472                }
473            }
474            i += 1;
475        }
476        false
477    }
478
479    // Check if a character is a word boundary (handles Unicode)
480    fn is_word_boundary_char(c: char) -> bool {
481        !c.is_alphanumeric()
482    }
483
484    // Check if position is at a word boundary using byte-level lookups.
485    fn is_at_word_boundary(content: &str, pos: usize, is_start: bool) -> bool {
486        if is_start {
487            if pos == 0 {
488                return true;
489            }
490            match content[..pos].chars().next_back() {
491                None => true,
492                Some(c) => Self::is_word_boundary_char(c),
493            }
494        } else {
495            if pos >= content.len() {
496                return true;
497            }
498            match content[pos..].chars().next() {
499                None => true,
500                Some(c) => Self::is_word_boundary_char(c),
501            }
502        }
503    }
504
505    /// For a frontmatter line, return the byte offset where the checkable
506    /// value portion starts. Returns `usize::MAX` if the entire line should be
507    /// skipped (frontmatter delimiters, key-only lines, YAML comments, flow constructs).
508    fn frontmatter_value_offset(line: &str) -> usize {
509        let trimmed = line.trim();
510
511        // Skip frontmatter delimiters and empty lines
512        if trimmed == "---" || trimmed == "+++" || trimmed.is_empty() {
513            return usize::MAX;
514        }
515
516        // Skip YAML comments
517        if trimmed.starts_with('#') {
518            return usize::MAX;
519        }
520
521        // YAML list item: "  - item" or "  - key: value"
522        let stripped = line.trim_start();
523        if let Some(after_dash) = stripped.strip_prefix("- ") {
524            let leading = line.len() - stripped.len();
525            // Check if the list item contains a mapping (e.g., "- key: value")
526            if let Some(result) = Self::kv_value_offset(line, after_dash, leading + 2) {
527                return result;
528            }
529            // Bare list item value (no colon) - check content after "- "
530            return leading + 2;
531        }
532        if stripped == "-" {
533            return usize::MAX;
534        }
535
536        // Key-value pair with colon separator (YAML): "key: value"
537        if let Some(result) = Self::kv_value_offset(line, stripped, line.len() - stripped.len()) {
538            return result;
539        }
540
541        // Key-value pair with equals separator (TOML): "key = value"
542        if let Some(eq_pos) = line.find('=') {
543            let after_eq = eq_pos + 1;
544            if after_eq < line.len() && line.as_bytes()[after_eq] == b' ' {
545                let value_start = after_eq + 1;
546                let value_slice = &line[value_start..];
547                let value_trimmed = value_slice.trim();
548                if value_trimmed.is_empty() {
549                    return usize::MAX;
550                }
551                // For quoted values, skip the opening quote character
552                if (value_trimmed.starts_with('"') && value_trimmed.ends_with('"'))
553                    || (value_trimmed.starts_with('\'') && value_trimmed.ends_with('\''))
554                {
555                    let quote_offset = value_slice.find(['"', '\'']).unwrap_or(0);
556                    return value_start + quote_offset + 1;
557                }
558                return value_start;
559            }
560            // Equals with no space after or at end of line -> no value to check
561            return usize::MAX;
562        }
563
564        // No separator found - continuation line or bare value, check the whole line
565        0
566    }
567
568    /// Parse a key-value pair using colon separator within `content` that starts
569    /// at `base_offset` in the original line. Returns `Some(offset)` if a colon
570    /// separator is found, `None` if no colon is present.
571    fn kv_value_offset(line: &str, content: &str, base_offset: usize) -> Option<usize> {
572        let colon_pos = content.find(':')?;
573        let abs_colon = base_offset + colon_pos;
574        let after_colon = abs_colon + 1;
575        if after_colon < line.len() && line.as_bytes()[after_colon] == b' ' {
576            let value_start = after_colon + 1;
577            let value_slice = &line[value_start..];
578            let value_trimmed = value_slice.trim();
579            if value_trimmed.is_empty() {
580                return Some(usize::MAX);
581            }
582            // Skip flow mappings and flow sequences - too complex for heuristic parsing
583            if value_trimmed.starts_with('{') || value_trimmed.starts_with('[') {
584                return Some(usize::MAX);
585            }
586            // For quoted values, skip the opening quote character
587            if (value_trimmed.starts_with('"') && value_trimmed.ends_with('"'))
588                || (value_trimmed.starts_with('\'') && value_trimmed.ends_with('\''))
589            {
590                let quote_offset = value_slice.find(['"', '\'']).unwrap_or(0);
591                return Some(value_start + quote_offset + 1);
592            }
593            return Some(value_start);
594        }
595        // Colon with no space after or at end of line -> no value to check
596        Some(usize::MAX)
597    }
598
599    // Get the proper name that should be used for a found name
600    fn get_proper_name_for(&self, found_name: &str) -> Option<String> {
601        let found_lower = found_name.to_lowercase();
602
603        // Iterate through the configured proper names
604        for name in &self.config.names {
605            let lower_name = name.to_lowercase();
606            let lower_name_no_dots = lower_name.replace('.', "");
607
608            // Direct match
609            if found_lower == lower_name || found_lower == lower_name_no_dots {
610                return Some(name.clone());
611            }
612
613            // Check ASCII-normalized version
614            let ascii_normalized = Self::ascii_normalize(&lower_name);
615
616            let ascii_no_dots = ascii_normalized.replace('.', "");
617
618            if found_lower == ascii_normalized || found_lower == ascii_no_dots {
619                return Some(name.clone());
620            }
621        }
622        None
623    }
624}
625
626impl Rule for MD044ProperNames {
627    fn name(&self) -> &'static str {
628        "MD044"
629    }
630
631    fn description(&self) -> &'static str {
632        "Proper names should have the correct capitalization"
633    }
634
635    fn should_skip(&self, ctx: &crate::lint_context::LintContext) -> bool {
636        if self.config.names.is_empty() {
637            return true;
638        }
639        // Quick check if any configured name variants exist (case-insensitive)
640        let content_lower = if ctx.content.is_ascii() {
641            ctx.content.to_ascii_lowercase()
642        } else {
643            ctx.content.to_lowercase()
644        };
645        !self.name_variants.iter().any(|name| content_lower.contains(name))
646    }
647
648    fn check(&self, ctx: &crate::lint_context::LintContext) -> LintResult {
649        let content = ctx.content;
650        if content.is_empty() || self.config.names.is_empty() || self.combined_pattern.is_none() {
651            return Ok(Vec::new());
652        }
653
654        // Compute lowercase content once and reuse across all checks
655        let content_lower = if content.is_ascii() {
656            content.to_ascii_lowercase()
657        } else {
658            content.to_lowercase()
659        };
660
661        // Early return: use pre-computed name_variants for the quick check
662        let has_potential_matches = self.name_variants.iter().any(|name| content_lower.contains(name));
663
664        if !has_potential_matches {
665            return Ok(Vec::new());
666        }
667
668        let line_index = &ctx.line_index;
669        let violations = self.find_name_violations(content, ctx, &content_lower);
670
671        let warnings = violations
672            .into_iter()
673            .filter_map(|(line, column, found_name)| {
674                self.get_proper_name_for(&found_name).map(|proper_name| LintWarning {
675                    rule_name: Some(self.name().to_string()),
676                    line,
677                    column,
678                    end_line: line,
679                    end_column: column + found_name.len(),
680                    message: format!("Proper name '{found_name}' should be '{proper_name}'"),
681                    severity: Severity::Warning,
682                    fix: Some(Fix {
683                        range: line_index.line_col_to_byte_range(line, column),
684                        replacement: proper_name,
685                    }),
686                })
687            })
688            .collect();
689
690        Ok(warnings)
691    }
692
693    fn fix(&self, ctx: &crate::lint_context::LintContext) -> Result<String, LintError> {
694        let content = ctx.content;
695        if content.is_empty() || self.config.names.is_empty() {
696            return Ok(content.to_string());
697        }
698
699        let content_lower = if content.is_ascii() {
700            content.to_ascii_lowercase()
701        } else {
702            content.to_lowercase()
703        };
704        let violations = self.find_name_violations(content, ctx, &content_lower);
705        if violations.is_empty() {
706            return Ok(content.to_string());
707        }
708
709        // Process lines and build the fixed content
710        let mut fixed_lines = Vec::new();
711
712        // Group violations by line
713        let mut violations_by_line: HashMap<usize, Vec<(usize, String)>> = HashMap::new();
714        for (line_num, col_num, found_name) in violations {
715            violations_by_line
716                .entry(line_num)
717                .or_default()
718                .push((col_num, found_name));
719        }
720
721        // Sort violations within each line in reverse order
722        for violations in violations_by_line.values_mut() {
723            violations.sort_by_key(|b| std::cmp::Reverse(b.0));
724        }
725
726        // Process each line
727        for (line_idx, line_info) in ctx.lines.iter().enumerate() {
728            let line_num = line_idx + 1;
729
730            if let Some(line_violations) = violations_by_line.get(&line_num) {
731                // This line has violations, fix them
732                let mut fixed_line = line_info.content(ctx.content).to_string();
733
734                for (col_num, found_name) in line_violations {
735                    if let Some(proper_name) = self.get_proper_name_for(found_name) {
736                        let start_col = col_num - 1; // Convert to 0-based
737                        let end_col = start_col + found_name.len();
738
739                        if end_col <= fixed_line.len()
740                            && fixed_line.is_char_boundary(start_col)
741                            && fixed_line.is_char_boundary(end_col)
742                        {
743                            fixed_line.replace_range(start_col..end_col, &proper_name);
744                        }
745                    }
746                }
747
748                fixed_lines.push(fixed_line);
749            } else {
750                // No violations on this line, keep it as is
751                fixed_lines.push(line_info.content(ctx.content).to_string());
752            }
753        }
754
755        // Join lines with newlines, preserving the original ending
756        let mut result = fixed_lines.join("\n");
757        if content.ends_with('\n') && !result.ends_with('\n') {
758            result.push('\n');
759        }
760        Ok(result)
761    }
762
763    fn as_any(&self) -> &dyn std::any::Any {
764        self
765    }
766
767    fn default_config_section(&self) -> Option<(String, toml::Value)> {
768        let json_value = serde_json::to_value(&self.config).ok()?;
769        Some((
770            self.name().to_string(),
771            crate::rule_config_serde::json_to_toml_value(&json_value)?,
772        ))
773    }
774
775    fn from_config(config: &crate::config::Config) -> Box<dyn Rule>
776    where
777        Self: Sized,
778    {
779        let rule_config = crate::rule_config_serde::load_rule_config::<MD044Config>(config);
780        Box::new(Self::from_config_struct(rule_config))
781    }
782}
783
784#[cfg(test)]
785mod tests {
786    use super::*;
787    use crate::lint_context::LintContext;
788
789    fn create_context(content: &str) -> LintContext<'_> {
790        LintContext::new(content, crate::config::MarkdownFlavor::Standard, None)
791    }
792
793    #[test]
794    fn test_correctly_capitalized_names() {
795        let rule = MD044ProperNames::new(
796            vec![
797                "JavaScript".to_string(),
798                "TypeScript".to_string(),
799                "Node.js".to_string(),
800            ],
801            true,
802        );
803
804        let content = "This document uses JavaScript, TypeScript, and Node.js correctly.";
805        let ctx = create_context(content);
806        let result = rule.check(&ctx).unwrap();
807        assert!(result.is_empty(), "Should not flag correctly capitalized names");
808    }
809
810    #[test]
811    fn test_incorrectly_capitalized_names() {
812        let rule = MD044ProperNames::new(vec!["JavaScript".to_string(), "TypeScript".to_string()], true);
813
814        let content = "This document uses javascript and typescript incorrectly.";
815        let ctx = create_context(content);
816        let result = rule.check(&ctx).unwrap();
817
818        assert_eq!(result.len(), 2, "Should flag two incorrect capitalizations");
819        assert_eq!(result[0].message, "Proper name 'javascript' should be 'JavaScript'");
820        assert_eq!(result[0].line, 1);
821        assert_eq!(result[0].column, 20);
822        assert_eq!(result[1].message, "Proper name 'typescript' should be 'TypeScript'");
823        assert_eq!(result[1].line, 1);
824        assert_eq!(result[1].column, 35);
825    }
826
827    #[test]
828    fn test_names_at_beginning_of_sentences() {
829        let rule = MD044ProperNames::new(vec!["JavaScript".to_string(), "Python".to_string()], true);
830
831        let content = "javascript is a great language. python is also popular.";
832        let ctx = create_context(content);
833        let result = rule.check(&ctx).unwrap();
834
835        assert_eq!(result.len(), 2, "Should flag names at beginning of sentences");
836        assert_eq!(result[0].line, 1);
837        assert_eq!(result[0].column, 1);
838        assert_eq!(result[1].line, 1);
839        assert_eq!(result[1].column, 33);
840    }
841
842    #[test]
843    fn test_names_in_code_blocks_checked_by_default() {
844        let rule = MD044ProperNames::new(vec!["JavaScript".to_string()], true);
845
846        let content = r#"Here is some text with JavaScript.
847
848```javascript
849// This javascript should be checked
850const lang = "javascript";
851```
852
853But this javascript should be flagged."#;
854
855        let ctx = create_context(content);
856        let result = rule.check(&ctx).unwrap();
857
858        assert_eq!(result.len(), 3, "Should flag javascript inside and outside code blocks");
859        assert_eq!(result[0].line, 4);
860        assert_eq!(result[1].line, 5);
861        assert_eq!(result[2].line, 8);
862    }
863
864    #[test]
865    fn test_names_in_code_blocks_ignored_when_disabled() {
866        let rule = MD044ProperNames::new(
867            vec!["JavaScript".to_string()],
868            false, // code_blocks = false means skip code blocks
869        );
870
871        let content = r#"```
872javascript in code block
873```"#;
874
875        let ctx = create_context(content);
876        let result = rule.check(&ctx).unwrap();
877
878        assert_eq!(
879            result.len(),
880            0,
881            "Should not flag javascript in code blocks when code_blocks is false"
882        );
883    }
884
885    #[test]
886    fn test_names_in_inline_code_checked_by_default() {
887        let rule = MD044ProperNames::new(vec!["JavaScript".to_string()], true);
888
889        let content = "This is `javascript` in inline code and javascript outside.";
890        let ctx = create_context(content);
891        let result = rule.check(&ctx).unwrap();
892
893        // When code_blocks=true, inline code should be checked
894        assert_eq!(result.len(), 2, "Should flag javascript inside and outside inline code");
895        assert_eq!(result[0].column, 10); // javascript in inline code
896        assert_eq!(result[1].column, 41); // javascript outside
897    }
898
899    #[test]
900    fn test_multiple_names_in_same_line() {
901        let rule = MD044ProperNames::new(
902            vec!["JavaScript".to_string(), "TypeScript".to_string(), "React".to_string()],
903            true,
904        );
905
906        let content = "I use javascript, typescript, and react in my projects.";
907        let ctx = create_context(content);
908        let result = rule.check(&ctx).unwrap();
909
910        assert_eq!(result.len(), 3, "Should flag all three incorrect names");
911        assert_eq!(result[0].message, "Proper name 'javascript' should be 'JavaScript'");
912        assert_eq!(result[1].message, "Proper name 'typescript' should be 'TypeScript'");
913        assert_eq!(result[2].message, "Proper name 'react' should be 'React'");
914    }
915
916    #[test]
917    fn test_case_sensitivity() {
918        let rule = MD044ProperNames::new(vec!["JavaScript".to_string()], true);
919
920        let content = "JAVASCRIPT, Javascript, javascript, and JavaScript variations.";
921        let ctx = create_context(content);
922        let result = rule.check(&ctx).unwrap();
923
924        assert_eq!(result.len(), 3, "Should flag all incorrect case variations");
925        // JavaScript (correct) should not be flagged
926        assert!(result.iter().all(|w| w.message.contains("should be 'JavaScript'")));
927    }
928
929    #[test]
930    fn test_configuration_with_custom_name_list() {
931        let config = MD044Config {
932            names: vec!["GitHub".to_string(), "GitLab".to_string(), "DevOps".to_string()],
933            code_blocks: true,
934            html_elements: true,
935            html_comments: true,
936        };
937        let rule = MD044ProperNames::from_config_struct(config);
938
939        let content = "We use github, gitlab, and devops for our workflow.";
940        let ctx = create_context(content);
941        let result = rule.check(&ctx).unwrap();
942
943        assert_eq!(result.len(), 3, "Should flag all custom names");
944        assert_eq!(result[0].message, "Proper name 'github' should be 'GitHub'");
945        assert_eq!(result[1].message, "Proper name 'gitlab' should be 'GitLab'");
946        assert_eq!(result[2].message, "Proper name 'devops' should be 'DevOps'");
947    }
948
949    #[test]
950    fn test_empty_configuration() {
951        let rule = MD044ProperNames::new(vec![], true);
952
953        let content = "This has javascript and typescript but no configured names.";
954        let ctx = create_context(content);
955        let result = rule.check(&ctx).unwrap();
956
957        assert!(result.is_empty(), "Should not flag anything with empty configuration");
958    }
959
960    #[test]
961    fn test_names_with_special_characters() {
962        let rule = MD044ProperNames::new(
963            vec!["Node.js".to_string(), "ASP.NET".to_string(), "C++".to_string()],
964            true,
965        );
966
967        let content = "We use nodejs, asp.net, ASP.NET, and c++ in our stack.";
968        let ctx = create_context(content);
969        let result = rule.check(&ctx).unwrap();
970
971        // nodejs should match Node.js (dotless variation)
972        // asp.net should be flagged (wrong case)
973        // ASP.NET should not be flagged (correct)
974        // c++ should be flagged
975        assert_eq!(result.len(), 3, "Should handle special characters correctly");
976
977        let messages: Vec<&str> = result.iter().map(|w| w.message.as_str()).collect();
978        assert!(messages.contains(&"Proper name 'nodejs' should be 'Node.js'"));
979        assert!(messages.contains(&"Proper name 'asp.net' should be 'ASP.NET'"));
980        assert!(messages.contains(&"Proper name 'c++' should be 'C++'"));
981    }
982
983    #[test]
984    fn test_word_boundaries() {
985        let rule = MD044ProperNames::new(vec!["Java".to_string(), "Script".to_string()], true);
986
987        let content = "JavaScript is not java or script, but Java and Script are separate.";
988        let ctx = create_context(content);
989        let result = rule.check(&ctx).unwrap();
990
991        // Should only flag lowercase "java" and "script" as separate words
992        assert_eq!(result.len(), 2, "Should respect word boundaries");
993        assert!(result.iter().any(|w| w.column == 19)); // "java" position
994        assert!(result.iter().any(|w| w.column == 27)); // "script" position
995    }
996
997    #[test]
998    fn test_fix_method() {
999        let rule = MD044ProperNames::new(
1000            vec![
1001                "JavaScript".to_string(),
1002                "TypeScript".to_string(),
1003                "Node.js".to_string(),
1004            ],
1005            true,
1006        );
1007
1008        let content = "I love javascript, typescript, and nodejs!";
1009        let ctx = create_context(content);
1010        let fixed = rule.fix(&ctx).unwrap();
1011
1012        assert_eq!(fixed, "I love JavaScript, TypeScript, and Node.js!");
1013    }
1014
1015    #[test]
1016    fn test_fix_multiple_occurrences() {
1017        let rule = MD044ProperNames::new(vec!["Python".to_string()], true);
1018
1019        let content = "python is great. I use python daily. PYTHON is powerful.";
1020        let ctx = create_context(content);
1021        let fixed = rule.fix(&ctx).unwrap();
1022
1023        assert_eq!(fixed, "Python is great. I use Python daily. Python is powerful.");
1024    }
1025
1026    #[test]
1027    fn test_fix_checks_code_blocks_by_default() {
1028        let rule = MD044ProperNames::new(vec!["JavaScript".to_string()], true);
1029
1030        let content = r#"I love javascript.
1031
1032```
1033const lang = "javascript";
1034```
1035
1036More javascript here."#;
1037
1038        let ctx = create_context(content);
1039        let fixed = rule.fix(&ctx).unwrap();
1040
1041        let expected = r#"I love JavaScript.
1042
1043```
1044const lang = "JavaScript";
1045```
1046
1047More JavaScript here."#;
1048
1049        assert_eq!(fixed, expected);
1050    }
1051
1052    #[test]
1053    fn test_multiline_content() {
1054        let rule = MD044ProperNames::new(vec!["Rust".to_string(), "Python".to_string()], true);
1055
1056        let content = r#"First line with rust.
1057Second line with python.
1058Third line with RUST and PYTHON."#;
1059
1060        let ctx = create_context(content);
1061        let result = rule.check(&ctx).unwrap();
1062
1063        assert_eq!(result.len(), 4, "Should flag all incorrect occurrences");
1064        assert_eq!(result[0].line, 1);
1065        assert_eq!(result[1].line, 2);
1066        assert_eq!(result[2].line, 3);
1067        assert_eq!(result[3].line, 3);
1068    }
1069
1070    #[test]
1071    fn test_default_config() {
1072        let config = MD044Config::default();
1073        assert!(config.names.is_empty());
1074        assert!(!config.code_blocks);
1075        assert!(config.html_elements);
1076        assert!(config.html_comments);
1077    }
1078
1079    #[test]
1080    fn test_default_config_checks_html_comments() {
1081        let config = MD044Config {
1082            names: vec!["JavaScript".to_string()],
1083            ..MD044Config::default()
1084        };
1085        let rule = MD044ProperNames::from_config_struct(config);
1086
1087        let content = "# Guide\n\n<!-- javascript mentioned here -->\n";
1088        let ctx = create_context(content);
1089        let result = rule.check(&ctx).unwrap();
1090
1091        assert_eq!(result.len(), 1, "Default config should check HTML comments");
1092        assert_eq!(result[0].line, 3);
1093    }
1094
1095    #[test]
1096    fn test_default_config_skips_code_blocks() {
1097        let config = MD044Config {
1098            names: vec!["JavaScript".to_string()],
1099            ..MD044Config::default()
1100        };
1101        let rule = MD044ProperNames::from_config_struct(config);
1102
1103        let content = "# Guide\n\n```\njavascript in code\n```\n";
1104        let ctx = create_context(content);
1105        let result = rule.check(&ctx).unwrap();
1106
1107        assert_eq!(result.len(), 0, "Default config should skip code blocks");
1108    }
1109
1110    #[test]
1111    fn test_standalone_html_comment_checked() {
1112        let config = MD044Config {
1113            names: vec!["Test".to_string()],
1114            ..MD044Config::default()
1115        };
1116        let rule = MD044ProperNames::from_config_struct(config);
1117
1118        let content = "# Heading\n\n<!-- this is a test example -->\n";
1119        let ctx = create_context(content);
1120        let result = rule.check(&ctx).unwrap();
1121
1122        assert_eq!(result.len(), 1, "Should flag proper name in standalone HTML comment");
1123        assert_eq!(result[0].line, 3);
1124    }
1125
1126    #[test]
1127    fn test_inline_config_comments_not_flagged() {
1128        let config = MD044Config {
1129            names: vec!["RUMDL".to_string()],
1130            ..MD044Config::default()
1131        };
1132        let rule = MD044ProperNames::from_config_struct(config);
1133
1134        // Lines 1, 3, 4, 6 are inline config comments — should not be flagged.
1135        // Lines 2, 5 contain "rumdl" in regular text — flagged by rule.check(),
1136        // but would be suppressed by the linting engine's inline config filtering.
1137        let content = "<!-- rumdl-disable MD044 -->\nSome rumdl text here.\n<!-- rumdl-enable MD044 -->\n<!-- markdownlint-disable -->\nMore rumdl text.\n<!-- markdownlint-enable -->\n";
1138        let ctx = create_context(content);
1139        let result = rule.check(&ctx).unwrap();
1140
1141        assert_eq!(result.len(), 2, "Should only flag body lines, not config comments");
1142        assert_eq!(result[0].line, 2);
1143        assert_eq!(result[1].line, 5);
1144    }
1145
1146    #[test]
1147    fn test_html_comment_skipped_when_disabled() {
1148        let config = MD044Config {
1149            names: vec!["Test".to_string()],
1150            code_blocks: true,
1151            html_elements: true,
1152            html_comments: false,
1153        };
1154        let rule = MD044ProperNames::from_config_struct(config);
1155
1156        let content = "# Heading\n\n<!-- this is a test example -->\n\nRegular test here.\n";
1157        let ctx = create_context(content);
1158        let result = rule.check(&ctx).unwrap();
1159
1160        assert_eq!(
1161            result.len(),
1162            1,
1163            "Should only flag 'test' outside HTML comment when html_comments=false"
1164        );
1165        assert_eq!(result[0].line, 5);
1166    }
1167
1168    #[test]
1169    fn test_fix_corrects_html_comment_content() {
1170        let config = MD044Config {
1171            names: vec!["JavaScript".to_string()],
1172            ..MD044Config::default()
1173        };
1174        let rule = MD044ProperNames::from_config_struct(config);
1175
1176        let content = "# Guide\n\n<!-- javascript mentioned here -->\n";
1177        let ctx = create_context(content);
1178        let fixed = rule.fix(&ctx).unwrap();
1179
1180        assert_eq!(fixed, "# Guide\n\n<!-- JavaScript mentioned here -->\n");
1181    }
1182
1183    #[test]
1184    fn test_fix_does_not_modify_inline_config_comments() {
1185        let config = MD044Config {
1186            names: vec!["RUMDL".to_string()],
1187            ..MD044Config::default()
1188        };
1189        let rule = MD044ProperNames::from_config_struct(config);
1190
1191        let content = "<!-- rumdl-disable -->\nSome rumdl text.\n<!-- rumdl-enable -->\n";
1192        let ctx = create_context(content);
1193        let fixed = rule.fix(&ctx).unwrap();
1194
1195        // Config comments should be untouched; body text should be fixed
1196        assert!(fixed.contains("<!-- rumdl-disable -->"));
1197        assert!(fixed.contains("<!-- rumdl-enable -->"));
1198        assert!(fixed.contains("Some RUMDL text."));
1199    }
1200
1201    #[test]
1202    fn test_performance_with_many_names() {
1203        let mut names = vec![];
1204        for i in 0..50 {
1205            names.push(format!("ProperName{i}"));
1206        }
1207
1208        let rule = MD044ProperNames::new(names, true);
1209
1210        let content = "This has propername0, propername25, and propername49 incorrectly.";
1211        let ctx = create_context(content);
1212        let result = rule.check(&ctx).unwrap();
1213
1214        assert_eq!(result.len(), 3, "Should handle many configured names efficiently");
1215    }
1216
1217    #[test]
1218    fn test_large_name_count_performance() {
1219        // Verify MD044 can handle large numbers of names without regex limitations
1220        // This test confirms that fancy-regex handles large patterns well
1221        let names = (0..1000).map(|i| format!("ProperName{i}")).collect::<Vec<_>>();
1222
1223        let rule = MD044ProperNames::new(names, true);
1224
1225        // The combined pattern should be created successfully
1226        assert!(rule.combined_pattern.is_some());
1227
1228        // Should be able to check content without errors
1229        let content = "This has propername0 and propername999 in it.";
1230        let ctx = create_context(content);
1231        let result = rule.check(&ctx).unwrap();
1232
1233        // Should detect both incorrect names
1234        assert_eq!(result.len(), 2, "Should handle 1000 names without issues");
1235    }
1236
1237    #[test]
1238    fn test_cache_behavior() {
1239        let rule = MD044ProperNames::new(vec!["JavaScript".to_string()], true);
1240
1241        let content = "Using javascript here.";
1242        let ctx = create_context(content);
1243
1244        // First check
1245        let result1 = rule.check(&ctx).unwrap();
1246        assert_eq!(result1.len(), 1);
1247
1248        // Second check should use cache
1249        let result2 = rule.check(&ctx).unwrap();
1250        assert_eq!(result2.len(), 1);
1251
1252        // Results should be identical
1253        assert_eq!(result1[0].line, result2[0].line);
1254        assert_eq!(result1[0].column, result2[0].column);
1255    }
1256
1257    #[test]
1258    fn test_html_comments_not_checked_when_disabled() {
1259        let config = MD044Config {
1260            names: vec!["JavaScript".to_string()],
1261            code_blocks: true,    // Check code blocks
1262            html_elements: true,  // Check HTML elements
1263            html_comments: false, // Don't check HTML comments
1264        };
1265        let rule = MD044ProperNames::from_config_struct(config);
1266
1267        let content = r#"Regular javascript here.
1268<!-- This javascript in HTML comment should be ignored -->
1269More javascript outside."#;
1270
1271        let ctx = create_context(content);
1272        let result = rule.check(&ctx).unwrap();
1273
1274        assert_eq!(result.len(), 2, "Should only flag javascript outside HTML comments");
1275        assert_eq!(result[0].line, 1);
1276        assert_eq!(result[1].line, 3);
1277    }
1278
1279    #[test]
1280    fn test_html_comments_checked_when_enabled() {
1281        let config = MD044Config {
1282            names: vec!["JavaScript".to_string()],
1283            code_blocks: true,   // Check code blocks
1284            html_elements: true, // Check HTML elements
1285            html_comments: true, // Check HTML comments
1286        };
1287        let rule = MD044ProperNames::from_config_struct(config);
1288
1289        let content = r#"Regular javascript here.
1290<!-- This javascript in HTML comment should be checked -->
1291More javascript outside."#;
1292
1293        let ctx = create_context(content);
1294        let result = rule.check(&ctx).unwrap();
1295
1296        assert_eq!(
1297            result.len(),
1298            3,
1299            "Should flag all javascript occurrences including in HTML comments"
1300        );
1301    }
1302
1303    #[test]
1304    fn test_multiline_html_comments() {
1305        let config = MD044Config {
1306            names: vec!["Python".to_string(), "JavaScript".to_string()],
1307            code_blocks: true,    // Check code blocks
1308            html_elements: true,  // Check HTML elements
1309            html_comments: false, // Don't check HTML comments
1310        };
1311        let rule = MD044ProperNames::from_config_struct(config);
1312
1313        let content = r#"Regular python here.
1314<!--
1315This is a multiline comment
1316with javascript and python
1317that should be ignored
1318-->
1319More javascript outside."#;
1320
1321        let ctx = create_context(content);
1322        let result = rule.check(&ctx).unwrap();
1323
1324        assert_eq!(result.len(), 2, "Should only flag names outside HTML comments");
1325        assert_eq!(result[0].line, 1); // python
1326        assert_eq!(result[1].line, 7); // javascript
1327    }
1328
1329    #[test]
1330    fn test_fix_preserves_html_comments_when_disabled() {
1331        let config = MD044Config {
1332            names: vec!["JavaScript".to_string()],
1333            code_blocks: true,    // Check code blocks
1334            html_elements: true,  // Check HTML elements
1335            html_comments: false, // Don't check HTML comments
1336        };
1337        let rule = MD044ProperNames::from_config_struct(config);
1338
1339        let content = r#"javascript here.
1340<!-- javascript in comment -->
1341More javascript."#;
1342
1343        let ctx = create_context(content);
1344        let fixed = rule.fix(&ctx).unwrap();
1345
1346        let expected = r#"JavaScript here.
1347<!-- javascript in comment -->
1348More JavaScript."#;
1349
1350        assert_eq!(
1351            fixed, expected,
1352            "Should not fix names inside HTML comments when disabled"
1353        );
1354    }
1355
1356    #[test]
1357    fn test_proper_names_in_link_text_are_flagged() {
1358        let rule = MD044ProperNames::new(
1359            vec!["JavaScript".to_string(), "Node.js".to_string(), "Python".to_string()],
1360            true,
1361        );
1362
1363        let content = r#"Check this [javascript documentation](https://javascript.info) for info.
1364
1365Visit [node.js homepage](https://nodejs.org) and [python tutorial](https://python.org).
1366
1367Real javascript should be flagged.
1368
1369Also see the [typescript guide][ts-ref] for more.
1370
1371Real python should be flagged too.
1372
1373[ts-ref]: https://typescript.org/handbook"#;
1374
1375        let ctx = create_context(content);
1376        let result = rule.check(&ctx).unwrap();
1377
1378        // Link text should be checked, URLs should not be checked
1379        // Line 1: [javascript documentation] - "javascript" should be flagged
1380        // Line 3: [node.js homepage] - "node.js" should be flagged (matches "Node.js")
1381        // Line 3: [python tutorial] - "python" should be flagged
1382        // Line 5: standalone javascript
1383        // Line 9: standalone python
1384        assert_eq!(result.len(), 5, "Expected 5 warnings: 3 in link text + 2 standalone");
1385
1386        // Verify line numbers for link text warnings
1387        let line_1_warnings: Vec<_> = result.iter().filter(|w| w.line == 1).collect();
1388        assert_eq!(line_1_warnings.len(), 1);
1389        assert!(
1390            line_1_warnings[0]
1391                .message
1392                .contains("'javascript' should be 'JavaScript'")
1393        );
1394
1395        let line_3_warnings: Vec<_> = result.iter().filter(|w| w.line == 3).collect();
1396        assert_eq!(line_3_warnings.len(), 2); // node.js and python
1397
1398        // Standalone warnings
1399        assert!(result.iter().any(|w| w.line == 5 && w.message.contains("'javascript'")));
1400        assert!(result.iter().any(|w| w.line == 9 && w.message.contains("'python'")));
1401    }
1402
1403    #[test]
1404    fn test_link_urls_not_flagged() {
1405        let rule = MD044ProperNames::new(vec!["JavaScript".to_string()], true);
1406
1407        // URL contains "javascript" but should NOT be flagged
1408        let content = r#"[Link Text](https://javascript.info/guide)"#;
1409
1410        let ctx = create_context(content);
1411        let result = rule.check(&ctx).unwrap();
1412
1413        // URL should not be checked
1414        assert!(result.is_empty(), "URLs should not be checked for proper names");
1415    }
1416
1417    #[test]
1418    fn test_proper_names_in_image_alt_text_are_flagged() {
1419        let rule = MD044ProperNames::new(vec!["JavaScript".to_string()], true);
1420
1421        let content = r#"Here is a ![javascript logo](javascript.png "javascript icon") image.
1422
1423Real javascript should be flagged."#;
1424
1425        let ctx = create_context(content);
1426        let result = rule.check(&ctx).unwrap();
1427
1428        // Image alt text should be checked, URL and title should not be checked
1429        // Line 1: ![javascript logo] - "javascript" should be flagged
1430        // Line 3: standalone javascript
1431        assert_eq!(result.len(), 2, "Expected 2 warnings: 1 in alt text + 1 standalone");
1432        assert!(result[0].message.contains("'javascript' should be 'JavaScript'"));
1433        assert!(result[0].line == 1); // "![javascript logo]"
1434        assert!(result[1].message.contains("'javascript' should be 'JavaScript'"));
1435        assert!(result[1].line == 3); // "Real javascript should be flagged."
1436    }
1437
1438    #[test]
1439    fn test_image_urls_not_flagged() {
1440        let rule = MD044ProperNames::new(vec!["JavaScript".to_string()], true);
1441
1442        // URL contains "javascript" but should NOT be flagged
1443        let content = r#"![Logo](https://javascript.info/logo.png)"#;
1444
1445        let ctx = create_context(content);
1446        let result = rule.check(&ctx).unwrap();
1447
1448        // Image URL should not be checked
1449        assert!(result.is_empty(), "Image URLs should not be checked for proper names");
1450    }
1451
1452    #[test]
1453    fn test_reference_link_text_flagged_but_definition_not() {
1454        let rule = MD044ProperNames::new(vec!["JavaScript".to_string(), "TypeScript".to_string()], true);
1455
1456        let content = r#"Check the [javascript guide][js-ref] for details.
1457
1458Real javascript should be flagged.
1459
1460[js-ref]: https://javascript.info/typescript/guide"#;
1461
1462        let ctx = create_context(content);
1463        let result = rule.check(&ctx).unwrap();
1464
1465        // Link text should be checked, reference definitions should not
1466        // Line 1: [javascript guide] - should be flagged
1467        // Line 3: standalone javascript - should be flagged
1468        // Line 5: reference definition - should NOT be flagged
1469        assert_eq!(result.len(), 2, "Expected 2 warnings: 1 in link text + 1 standalone");
1470        assert!(result.iter().any(|w| w.line == 1 && w.message.contains("'javascript'")));
1471        assert!(result.iter().any(|w| w.line == 3 && w.message.contains("'javascript'")));
1472    }
1473
1474    #[test]
1475    fn test_reference_definitions_not_flagged() {
1476        let rule = MD044ProperNames::new(vec!["JavaScript".to_string()], true);
1477
1478        // Reference definition should NOT be flagged
1479        let content = r#"[js-ref]: https://javascript.info/guide"#;
1480
1481        let ctx = create_context(content);
1482        let result = rule.check(&ctx).unwrap();
1483
1484        // Reference definition URLs should not be checked
1485        assert!(result.is_empty(), "Reference definitions should not be checked");
1486    }
1487
1488    #[test]
1489    fn test_wikilinks_text_is_flagged() {
1490        let rule = MD044ProperNames::new(vec!["JavaScript".to_string()], true);
1491
1492        // WikiLinks [[destination]] should have their text checked
1493        let content = r#"[[javascript]]
1494
1495Regular javascript here.
1496
1497[[JavaScript|display text]]"#;
1498
1499        let ctx = create_context(content);
1500        let result = rule.check(&ctx).unwrap();
1501
1502        // Line 1: [[javascript]] - should be flagged (WikiLink text)
1503        // Line 3: standalone javascript - should be flagged
1504        // Line 5: [[JavaScript|display text]] - correct capitalization, no flag
1505        assert_eq!(result.len(), 2, "Expected 2 warnings: 1 in WikiLink + 1 standalone");
1506        assert!(
1507            result
1508                .iter()
1509                .any(|w| w.line == 1 && w.column == 3 && w.message.contains("'javascript'"))
1510        );
1511        assert!(result.iter().any(|w| w.line == 3 && w.message.contains("'javascript'")));
1512    }
1513
1514    #[test]
1515    fn test_url_link_text_not_flagged() {
1516        let rule = MD044ProperNames::new(vec!["GitHub".to_string()], true);
1517
1518        // Link text that is itself a URL should not be flagged
1519        let content = r#"[https://github.com/org/repo](https://github.com/org/repo)
1520
1521[http://github.com/org/repo](http://github.com/org/repo)
1522
1523[www.github.com/org/repo](https://www.github.com/org/repo)"#;
1524
1525        let ctx = create_context(content);
1526        let result = rule.check(&ctx).unwrap();
1527
1528        assert!(
1529            result.is_empty(),
1530            "URL-like link text should not be flagged, got: {result:?}"
1531        );
1532    }
1533
1534    #[test]
1535    fn test_url_link_text_with_leading_space_not_flagged() {
1536        let rule = MD044ProperNames::new(vec!["GitHub".to_string()], true);
1537
1538        // Leading/trailing whitespace in link text should be trimmed before URL check
1539        let content = r#"[ https://github.com/org/repo](https://github.com/org/repo)"#;
1540
1541        let ctx = create_context(content);
1542        let result = rule.check(&ctx).unwrap();
1543
1544        assert!(
1545            result.is_empty(),
1546            "URL-like link text with leading space should not be flagged, got: {result:?}"
1547        );
1548    }
1549
1550    #[test]
1551    fn test_url_link_text_uppercase_scheme_not_flagged() {
1552        let rule = MD044ProperNames::new(vec!["GitHub".to_string()], true);
1553
1554        let content = r#"[HTTPS://GITHUB.COM/org/repo](https://github.com/org/repo)"#;
1555
1556        let ctx = create_context(content);
1557        let result = rule.check(&ctx).unwrap();
1558
1559        assert!(
1560            result.is_empty(),
1561            "URL-like link text with uppercase scheme should not be flagged, got: {result:?}"
1562        );
1563    }
1564
1565    #[test]
1566    fn test_non_url_link_text_still_flagged() {
1567        let rule = MD044ProperNames::new(vec!["GitHub".to_string()], true);
1568
1569        // Link text that is NOT a URL should still be flagged
1570        let content = r#"[github.com/org/repo](https://github.com/org/repo)
1571
1572[Visit github](https://github.com/org/repo)
1573
1574[//github.com/org/repo](//github.com/org/repo)
1575
1576[ftp://github.com/org/repo](ftp://github.com/org/repo)"#;
1577
1578        let ctx = create_context(content);
1579        let result = rule.check(&ctx).unwrap();
1580
1581        assert_eq!(result.len(), 4, "Non-URL link text should be flagged, got: {result:?}");
1582        assert!(result.iter().any(|w| w.line == 1)); // github.com (no protocol)
1583        assert!(result.iter().any(|w| w.line == 3)); // Visit github
1584        assert!(result.iter().any(|w| w.line == 5)); // //github.com (protocol-relative)
1585        assert!(result.iter().any(|w| w.line == 7)); // ftp://github.com
1586    }
1587
1588    #[test]
1589    fn test_url_link_text_fix_not_applied() {
1590        let rule = MD044ProperNames::new(vec!["GitHub".to_string()], true);
1591
1592        let content = "[https://github.com/org/repo](https://github.com/org/repo)\n";
1593
1594        let ctx = create_context(content);
1595        let result = rule.fix(&ctx).unwrap();
1596
1597        assert_eq!(result, content, "Fix should not modify URL-like link text");
1598    }
1599
1600    #[test]
1601    fn test_mixed_url_and_regular_link_text() {
1602        let rule = MD044ProperNames::new(vec!["GitHub".to_string()], true);
1603
1604        // Mix of URL link text (should skip) and regular text (should flag)
1605        let content = r#"[https://github.com/org/repo](https://github.com/org/repo)
1606
1607Visit [github documentation](https://github.com/docs) for details.
1608
1609[www.github.com/pricing](https://www.github.com/pricing)"#;
1610
1611        let ctx = create_context(content);
1612        let result = rule.check(&ctx).unwrap();
1613
1614        // Only line 3 should be flagged ("github documentation" is not a URL)
1615        assert_eq!(
1616            result.len(),
1617            1,
1618            "Only non-URL link text should be flagged, got: {result:?}"
1619        );
1620        assert_eq!(result[0].line, 3);
1621    }
1622
1623    #[test]
1624    fn test_html_attribute_values_not_flagged() {
1625        // Matches inside HTML tag attributes (between `<` and `>`) are not flagged.
1626        // Attribute values are not prose — they hold URLs, class names, data values, etc.
1627        let rule = MD044ProperNames::new(vec!["Test".to_string()], true);
1628        let content = "# Heading\n\ntest\n\n<img src=\"www.example.test/test_image.png\">\n";
1629        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1630        let result = rule.check(&ctx).unwrap();
1631
1632        // Nothing on line 5 should be flagged — everything is inside the `<img ...>` tag
1633        let line5_violations: Vec<_> = result.iter().filter(|w| w.line == 5).collect();
1634        assert!(
1635            line5_violations.is_empty(),
1636            "Should not flag anything inside HTML tag attributes: {line5_violations:?}"
1637        );
1638
1639        // Plain text on line 3 is still flagged
1640        let line3_violations: Vec<_> = result.iter().filter(|w| w.line == 3).collect();
1641        assert_eq!(line3_violations.len(), 1, "Plain 'test' on line 3 should be flagged");
1642    }
1643
1644    #[test]
1645    fn test_html_text_content_still_flagged() {
1646        // Text between HTML tags (not inside `<...>`) is still checked.
1647        let rule = MD044ProperNames::new(vec!["Test".to_string()], true);
1648        let content = "# Heading\n\n<a href=\"https://example.test/page\">test link</a>\n";
1649        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1650        let result = rule.check(&ctx).unwrap();
1651
1652        // "example.test" in the href attribute → not flagged (inside `<...>`)
1653        // "test link" in the anchor text → flagged (between `>` and `<`)
1654        assert_eq!(
1655            result.len(),
1656            1,
1657            "Should flag only 'test' in anchor text, not in href: {result:?}"
1658        );
1659        assert_eq!(result[0].column, 37, "Should flag col 37 ('test link' in anchor text)");
1660    }
1661
1662    #[test]
1663    fn test_html_attribute_various_not_flagged() {
1664        // All attribute types are ignored: src, href, alt, class, data-*, title, etc.
1665        let rule = MD044ProperNames::new(vec!["Test".to_string()], true);
1666        let content = concat!(
1667            "# Heading\n\n",
1668            "<img src=\"test.png\" alt=\"test image\">\n",
1669            "<span class=\"test-class\" data-test=\"value\">test content</span>\n",
1670        );
1671        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1672        let result = rule.check(&ctx).unwrap();
1673
1674        // Only "test content" (between tags on line 4) should be flagged
1675        assert_eq!(
1676            result.len(),
1677            1,
1678            "Should flag only 'test content' between tags: {result:?}"
1679        );
1680        assert_eq!(result[0].line, 4);
1681    }
1682
1683    #[test]
1684    fn test_plain_text_underscore_boundary_unchanged() {
1685        // Plain text (outside HTML tags) still uses original word boundary semantics where
1686        // underscore is a boundary character, matching markdownlint's behavior via AST splitting.
1687        let rule = MD044ProperNames::new(vec!["Test".to_string()], true);
1688        let content = "# Heading\n\ntest_image is here and just_test ends here\n";
1689        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1690        let result = rule.check(&ctx).unwrap();
1691
1692        // Both "test_image" (test at start) and "just_test" (test at end) are flagged
1693        // because in plain text, "_" is a word boundary
1694        assert_eq!(
1695            result.len(),
1696            2,
1697            "Should flag 'test' in both 'test_image' and 'just_test': {result:?}"
1698        );
1699        let cols: Vec<usize> = result.iter().map(|w| w.column).collect();
1700        assert!(cols.contains(&1), "Should flag col 1 (test_image): {cols:?}");
1701        assert!(cols.contains(&29), "Should flag col 29 (just_test): {cols:?}");
1702    }
1703
1704    #[test]
1705    fn test_frontmatter_yaml_keys_not_flagged() {
1706        // YAML keys in frontmatter should NOT be checked for proper name violations.
1707        // Only values should be checked.
1708        let rule = MD044ProperNames::new(vec!["Test".to_string()], true);
1709
1710        let content = "---\ntitle: Heading\ntest: Some Test value\n---\n\nTest\n";
1711        let ctx = create_context(content);
1712        let result = rule.check(&ctx).unwrap();
1713
1714        // "test" in the YAML key (line 3) should NOT be flagged
1715        // "Test" in the YAML value (line 3) is correct capitalization, no flag
1716        // "Test" in body (line 6) is correct capitalization, no flag
1717        assert!(
1718            result.is_empty(),
1719            "Should not flag YAML keys or correctly capitalized values: {result:?}"
1720        );
1721    }
1722
1723    #[test]
1724    fn test_frontmatter_yaml_values_flagged() {
1725        // Incorrectly capitalized names in YAML values should be flagged.
1726        let rule = MD044ProperNames::new(vec!["Test".to_string()], true);
1727
1728        let content = "---\ntitle: Heading\nkey: a test value\n---\n\nTest\n";
1729        let ctx = create_context(content);
1730        let result = rule.check(&ctx).unwrap();
1731
1732        // "test" in the YAML value (line 3) SHOULD be flagged
1733        assert_eq!(result.len(), 1, "Should flag 'test' in YAML value: {result:?}");
1734        assert_eq!(result[0].line, 3);
1735        assert_eq!(result[0].column, 8); // "key: a " = 7 chars, then "test" at column 8
1736    }
1737
1738    #[test]
1739    fn test_frontmatter_key_matches_name_not_flagged() {
1740        // A YAML key that happens to match a configured name should NOT be flagged.
1741        let rule = MD044ProperNames::new(vec!["Test".to_string()], true);
1742
1743        let content = "---\ntest: other value\n---\n\nBody text\n";
1744        let ctx = create_context(content);
1745        let result = rule.check(&ctx).unwrap();
1746
1747        assert!(
1748            result.is_empty(),
1749            "Should not flag YAML key that matches configured name: {result:?}"
1750        );
1751    }
1752
1753    #[test]
1754    fn test_frontmatter_empty_value_not_flagged() {
1755        // YAML key with no value should be skipped entirely.
1756        let rule = MD044ProperNames::new(vec!["Test".to_string()], true);
1757
1758        let content = "---\ntest:\ntest: \n---\n\nBody text\n";
1759        let ctx = create_context(content);
1760        let result = rule.check(&ctx).unwrap();
1761
1762        assert!(
1763            result.is_empty(),
1764            "Should not flag YAML keys with empty values: {result:?}"
1765        );
1766    }
1767
1768    #[test]
1769    fn test_frontmatter_nested_yaml_key_not_flagged() {
1770        // Nested/indented YAML keys should also be skipped.
1771        let rule = MD044ProperNames::new(vec!["Test".to_string()], true);
1772
1773        let content = "---\nparent:\n  test: nested value\n---\n\nBody text\n";
1774        let ctx = create_context(content);
1775        let result = rule.check(&ctx).unwrap();
1776
1777        // "test" as a nested key should NOT be flagged
1778        assert!(result.is_empty(), "Should not flag nested YAML keys: {result:?}");
1779    }
1780
1781    #[test]
1782    fn test_frontmatter_list_items_checked() {
1783        // YAML list items are values and should be checked for proper names.
1784        let rule = MD044ProperNames::new(vec!["Test".to_string()], true);
1785
1786        let content = "---\ntags:\n  - test\n  - other\n---\n\nBody text\n";
1787        let ctx = create_context(content);
1788        let result = rule.check(&ctx).unwrap();
1789
1790        // "test" as a list item value SHOULD be flagged
1791        assert_eq!(result.len(), 1, "Should flag 'test' in YAML list item: {result:?}");
1792        assert_eq!(result[0].line, 3);
1793    }
1794
1795    #[test]
1796    fn test_frontmatter_value_with_multiple_colons() {
1797        // For "key: value: more", key is before first colon.
1798        let rule = MD044ProperNames::new(vec!["Test".to_string()], true);
1799
1800        let content = "---\ntest: description: a test thing\n---\n\nBody text\n";
1801        let ctx = create_context(content);
1802        let result = rule.check(&ctx).unwrap();
1803
1804        // "test" as key should NOT be flagged
1805        // "test" in value portion ("description: a test thing") SHOULD be flagged
1806        assert_eq!(
1807            result.len(),
1808            1,
1809            "Should flag 'test' in value after first colon: {result:?}"
1810        );
1811        assert_eq!(result[0].line, 2);
1812        assert!(result[0].column > 6, "Violation column should be in value portion");
1813    }
1814
1815    #[test]
1816    fn test_frontmatter_does_not_affect_body() {
1817        // Body text after frontmatter should still be fully checked.
1818        let rule = MD044ProperNames::new(vec!["Test".to_string()], true);
1819
1820        let content = "---\ntitle: Heading\n---\n\ntest should be flagged here\n";
1821        let ctx = create_context(content);
1822        let result = rule.check(&ctx).unwrap();
1823
1824        assert_eq!(result.len(), 1, "Should flag 'test' in body text: {result:?}");
1825        assert_eq!(result[0].line, 5);
1826    }
1827
1828    #[test]
1829    fn test_frontmatter_fix_corrects_values_preserves_keys() {
1830        // Fix should correct YAML values but preserve keys.
1831        let rule = MD044ProperNames::new(vec!["Test".to_string()], true);
1832
1833        let content = "---\ntest: a test value\n---\n\ntest here\n";
1834        let ctx = create_context(content);
1835        let fixed = rule.fix(&ctx).unwrap();
1836
1837        // Key "test" should remain lowercase; value "test" should become "Test"
1838        assert_eq!(fixed, "---\ntest: a Test value\n---\n\nTest here\n");
1839    }
1840
1841    #[test]
1842    fn test_frontmatter_multiword_value_flagged() {
1843        // Multiple proper names in a single YAML value should all be flagged.
1844        let rule = MD044ProperNames::new(vec!["JavaScript".to_string(), "TypeScript".to_string()], true);
1845
1846        let content = "---\ndescription: Learn javascript and typescript\n---\n\nBody\n";
1847        let ctx = create_context(content);
1848        let result = rule.check(&ctx).unwrap();
1849
1850        assert_eq!(result.len(), 2, "Should flag both names in YAML value: {result:?}");
1851        assert!(result.iter().all(|w| w.line == 2));
1852    }
1853
1854    #[test]
1855    fn test_frontmatter_yaml_comments_not_checked() {
1856        // YAML comments inside frontmatter should be skipped entirely.
1857        let rule = MD044ProperNames::new(vec!["Test".to_string()], true);
1858
1859        let content = "---\n# test comment\ntitle: Heading\n---\n\nBody text\n";
1860        let ctx = create_context(content);
1861        let result = rule.check(&ctx).unwrap();
1862
1863        assert!(result.is_empty(), "Should not flag names in YAML comments: {result:?}");
1864    }
1865
1866    #[test]
1867    fn test_frontmatter_delimiters_not_checked() {
1868        // Frontmatter delimiter lines (--- or +++) should never be checked.
1869        let rule = MD044ProperNames::new(vec!["Test".to_string()], true);
1870
1871        let content = "---\ntitle: Heading\n---\n\ntest here\n";
1872        let ctx = create_context(content);
1873        let result = rule.check(&ctx).unwrap();
1874
1875        // Only the body "test" on line 5 should be flagged
1876        assert_eq!(result.len(), 1, "Should only flag body text: {result:?}");
1877        assert_eq!(result[0].line, 5);
1878    }
1879
1880    #[test]
1881    fn test_frontmatter_continuation_lines_checked() {
1882        // Continuation lines (indented, no colon) are value content and should be checked.
1883        let rule = MD044ProperNames::new(vec!["Test".to_string()], true);
1884
1885        let content = "---\ndescription: >\n  a test value\n  continued here\n---\n\nBody\n";
1886        let ctx = create_context(content);
1887        let result = rule.check(&ctx).unwrap();
1888
1889        // "test" on the continuation line should be flagged
1890        assert_eq!(result.len(), 1, "Should flag 'test' in continuation line: {result:?}");
1891        assert_eq!(result[0].line, 3);
1892    }
1893
1894    #[test]
1895    fn test_frontmatter_quoted_values_checked() {
1896        // Quoted YAML values should have their content checked (inside the quotes).
1897        let rule = MD044ProperNames::new(vec!["Test".to_string()], true);
1898
1899        let content = "---\ntitle: \"a test title\"\n---\n\nBody\n";
1900        let ctx = create_context(content);
1901        let result = rule.check(&ctx).unwrap();
1902
1903        assert_eq!(result.len(), 1, "Should flag 'test' in quoted YAML value: {result:?}");
1904        assert_eq!(result[0].line, 2);
1905    }
1906
1907    #[test]
1908    fn test_frontmatter_single_quoted_values_checked() {
1909        // Single-quoted YAML values should have their content checked.
1910        let rule = MD044ProperNames::new(vec!["Test".to_string()], true);
1911
1912        let content = "---\ntitle: 'a test title'\n---\n\nBody\n";
1913        let ctx = create_context(content);
1914        let result = rule.check(&ctx).unwrap();
1915
1916        assert_eq!(
1917            result.len(),
1918            1,
1919            "Should flag 'test' in single-quoted YAML value: {result:?}"
1920        );
1921        assert_eq!(result[0].line, 2);
1922    }
1923
1924    #[test]
1925    fn test_frontmatter_fix_multiword_values() {
1926        // Fix should correct all proper names in frontmatter values.
1927        let rule = MD044ProperNames::new(vec!["JavaScript".to_string(), "TypeScript".to_string()], true);
1928
1929        let content = "---\ndescription: Learn javascript and typescript\n---\n\nBody\n";
1930        let ctx = create_context(content);
1931        let fixed = rule.fix(&ctx).unwrap();
1932
1933        assert_eq!(
1934            fixed,
1935            "---\ndescription: Learn JavaScript and TypeScript\n---\n\nBody\n"
1936        );
1937    }
1938
1939    #[test]
1940    fn test_frontmatter_fix_preserves_yaml_structure() {
1941        // Fix should preserve YAML structure while correcting values.
1942        let rule = MD044ProperNames::new(vec!["Test".to_string()], true);
1943
1944        let content = "---\ntags:\n  - test\n  - other\ntitle: a test doc\n---\n\ntest body\n";
1945        let ctx = create_context(content);
1946        let fixed = rule.fix(&ctx).unwrap();
1947
1948        assert_eq!(
1949            fixed,
1950            "---\ntags:\n  - Test\n  - other\ntitle: a Test doc\n---\n\nTest body\n"
1951        );
1952    }
1953
1954    #[test]
1955    fn test_frontmatter_toml_delimiters_not_checked() {
1956        // TOML frontmatter with +++ delimiters should also be handled.
1957        let rule = MD044ProperNames::new(vec!["Test".to_string()], true);
1958
1959        let content = "+++\ntitle = \"a test title\"\n+++\n\ntest body\n";
1960        let ctx = create_context(content);
1961        let result = rule.check(&ctx).unwrap();
1962
1963        // "title" as TOML key should NOT be flagged
1964        // "test" in TOML quoted value SHOULD be flagged (line 2)
1965        // "test" in body SHOULD be flagged (line 5)
1966        assert_eq!(result.len(), 2, "Should flag TOML value and body: {result:?}");
1967        let fm_violations: Vec<_> = result.iter().filter(|w| w.line == 2).collect();
1968        assert_eq!(fm_violations.len(), 1, "Should flag 'test' in TOML value: {result:?}");
1969        let body_violations: Vec<_> = result.iter().filter(|w| w.line == 5).collect();
1970        assert_eq!(body_violations.len(), 1, "Should flag body 'test': {result:?}");
1971    }
1972
1973    #[test]
1974    fn test_frontmatter_toml_key_not_flagged() {
1975        // TOML keys should NOT be flagged, only values.
1976        let rule = MD044ProperNames::new(vec!["Test".to_string()], true);
1977
1978        let content = "+++\ntest = \"other value\"\n+++\n\nBody text\n";
1979        let ctx = create_context(content);
1980        let result = rule.check(&ctx).unwrap();
1981
1982        assert!(
1983            result.is_empty(),
1984            "Should not flag TOML key that matches configured name: {result:?}"
1985        );
1986    }
1987
1988    #[test]
1989    fn test_frontmatter_toml_fix_preserves_keys() {
1990        // Fix should correct TOML values but preserve keys.
1991        let rule = MD044ProperNames::new(vec!["Test".to_string()], true);
1992
1993        let content = "+++\ntest = \"a test value\"\n+++\n\ntest here\n";
1994        let ctx = create_context(content);
1995        let fixed = rule.fix(&ctx).unwrap();
1996
1997        // Key "test" should remain lowercase; value "test" should become "Test"
1998        assert_eq!(fixed, "+++\ntest = \"a Test value\"\n+++\n\nTest here\n");
1999    }
2000
2001    #[test]
2002    fn test_frontmatter_list_item_mapping_key_not_flagged() {
2003        // In "- test: nested value", "test" is a YAML key within a list-item mapping.
2004        // The key should NOT be flagged; only the value should be checked.
2005        let rule = MD044ProperNames::new(vec!["Test".to_string()], true);
2006
2007        let content = "---\nitems:\n  - test: nested value\n---\n\nBody text\n";
2008        let ctx = create_context(content);
2009        let result = rule.check(&ctx).unwrap();
2010
2011        assert!(
2012            result.is_empty(),
2013            "Should not flag YAML key in list-item mapping: {result:?}"
2014        );
2015    }
2016
2017    #[test]
2018    fn test_frontmatter_list_item_mapping_value_flagged() {
2019        // In "- key: test value", the value portion should be checked.
2020        let rule = MD044ProperNames::new(vec!["Test".to_string()], true);
2021
2022        let content = "---\nitems:\n  - key: a test value\n---\n\nBody text\n";
2023        let ctx = create_context(content);
2024        let result = rule.check(&ctx).unwrap();
2025
2026        assert_eq!(
2027            result.len(),
2028            1,
2029            "Should flag 'test' in list-item mapping value: {result:?}"
2030        );
2031        assert_eq!(result[0].line, 3);
2032    }
2033
2034    #[test]
2035    fn test_frontmatter_bare_list_item_still_flagged() {
2036        // Bare list items without a colon (e.g., "- test") are values and should be flagged.
2037        let rule = MD044ProperNames::new(vec!["Test".to_string()], true);
2038
2039        let content = "---\ntags:\n  - test\n  - other\n---\n\nBody text\n";
2040        let ctx = create_context(content);
2041        let result = rule.check(&ctx).unwrap();
2042
2043        assert_eq!(result.len(), 1, "Should flag 'test' in bare list item: {result:?}");
2044        assert_eq!(result[0].line, 3);
2045    }
2046
2047    #[test]
2048    fn test_frontmatter_flow_mapping_not_flagged() {
2049        // Flow mappings like {test: value} contain YAML keys that should not be flagged.
2050        // The entire flow construct should be skipped.
2051        let rule = MD044ProperNames::new(vec!["Test".to_string()], true);
2052
2053        let content = "---\nflow_map: {test: value, other: test}\n---\n\nBody text\n";
2054        let ctx = create_context(content);
2055        let result = rule.check(&ctx).unwrap();
2056
2057        assert!(
2058            result.is_empty(),
2059            "Should not flag names inside flow mappings: {result:?}"
2060        );
2061    }
2062
2063    #[test]
2064    fn test_frontmatter_flow_sequence_not_flagged() {
2065        // Flow sequences like [test, other] should also be skipped.
2066        let rule = MD044ProperNames::new(vec!["Test".to_string()], true);
2067
2068        let content = "---\nitems: [test, other, test]\n---\n\nBody text\n";
2069        let ctx = create_context(content);
2070        let result = rule.check(&ctx).unwrap();
2071
2072        assert!(
2073            result.is_empty(),
2074            "Should not flag names inside flow sequences: {result:?}"
2075        );
2076    }
2077
2078    #[test]
2079    fn test_frontmatter_list_item_mapping_fix_preserves_key() {
2080        // Fix should correct values in list-item mappings but preserve keys.
2081        let rule = MD044ProperNames::new(vec!["Test".to_string()], true);
2082
2083        let content = "---\nitems:\n  - test: a test value\n---\n\ntest here\n";
2084        let ctx = create_context(content);
2085        let fixed = rule.fix(&ctx).unwrap();
2086
2087        // "test" as list-item key should remain lowercase;
2088        // "test" in value portion should become "Test"
2089        assert_eq!(fixed, "---\nitems:\n  - test: a Test value\n---\n\nTest here\n");
2090    }
2091
2092    // --- Angle-bracket URL tests (issue #457) ---
2093
2094    #[test]
2095    fn test_angle_bracket_url_in_html_comment_not_flagged() {
2096        // Angle-bracket URLs inside HTML comments should be skipped
2097        let config = MD044Config {
2098            names: vec!["Test".to_string()],
2099            ..MD044Config::default()
2100        };
2101        let rule = MD044ProperNames::from_config_struct(config);
2102
2103        let content = "---\ntitle: Level 1 heading\n---\n\n<https://www.example.test>\n\n<!-- This is a Test https://www.example.test -->\n<!-- This is a Test <https://www.example.test> -->\n";
2104        let ctx = create_context(content);
2105        let result = rule.check(&ctx).unwrap();
2106
2107        // Line 7: "Test" in comment prose before bare URL -- already correct capitalization
2108        // Line 7: "test" in bare URL (not in angle brackets) -- but "test" is in URL domain, not prose.
2109        //   However, .example.test has "test" at a word boundary (after '.'), so it IS flagged.
2110        // Line 8: "Test" in comment prose -- correct capitalization, not flagged
2111        // Line 8: "test" in <https://www.example.test> -- inside angle-bracket URL, NOT flagged
2112
2113        // The key assertion: line 8's angle-bracket URL should NOT produce a warning
2114        let line8_warnings: Vec<_> = result.iter().filter(|w| w.line == 8).collect();
2115        assert!(
2116            line8_warnings.is_empty(),
2117            "Should not flag names inside angle-bracket URLs in HTML comments: {line8_warnings:?}"
2118        );
2119    }
2120
2121    #[test]
2122    fn test_bare_url_in_html_comment_still_flagged() {
2123        // Bare URLs (not in angle brackets) inside HTML comments should still be checked
2124        let config = MD044Config {
2125            names: vec!["Test".to_string()],
2126            ..MD044Config::default()
2127        };
2128        let rule = MD044ProperNames::from_config_struct(config);
2129
2130        let content = "<!-- This is a test https://www.example.test -->\n";
2131        let ctx = create_context(content);
2132        let result = rule.check(&ctx).unwrap();
2133
2134        // "test" appears as prose text before URL and also in the bare URL domain
2135        // At minimum, the prose "test" should be flagged
2136        assert!(
2137            !result.is_empty(),
2138            "Should flag 'test' in prose text of HTML comment with bare URL"
2139        );
2140    }
2141
2142    #[test]
2143    fn test_angle_bracket_url_in_regular_markdown_not_flagged() {
2144        // Angle-bracket URLs in regular markdown are already handled by the link parser,
2145        // but the angle-bracket check provides a safety net
2146        let rule = MD044ProperNames::new(vec!["Test".to_string()], true);
2147
2148        let content = "<https://www.example.test>\n";
2149        let ctx = create_context(content);
2150        let result = rule.check(&ctx).unwrap();
2151
2152        assert!(
2153            result.is_empty(),
2154            "Should not flag names inside angle-bracket URLs in regular markdown: {result:?}"
2155        );
2156    }
2157
2158    #[test]
2159    fn test_multiple_angle_bracket_urls_in_one_comment() {
2160        let config = MD044Config {
2161            names: vec!["Test".to_string()],
2162            ..MD044Config::default()
2163        };
2164        let rule = MD044ProperNames::from_config_struct(config);
2165
2166        let content = "<!-- See <https://test.example.com> and <https://www.example.test> for details -->\n";
2167        let ctx = create_context(content);
2168        let result = rule.check(&ctx).unwrap();
2169
2170        // Both URLs are inside angle brackets, so "test" inside them should NOT be flagged
2171        assert!(
2172            result.is_empty(),
2173            "Should not flag names inside multiple angle-bracket URLs: {result:?}"
2174        );
2175    }
2176
2177    #[test]
2178    fn test_angle_bracket_non_url_still_flagged() {
2179        // <Test> is NOT a URL (no scheme), so is_in_angle_bracket_url does NOT protect it.
2180        // Whether it gets flagged depends on HTML tag detection, not on our URL check.
2181        assert!(
2182            !MD044ProperNames::is_in_angle_bracket_url("<test> which is not a URL.", 1),
2183            "is_in_angle_bracket_url should return false for non-URL angle brackets"
2184        );
2185    }
2186
2187    #[test]
2188    fn test_angle_bracket_mailto_url_not_flagged() {
2189        let config = MD044Config {
2190            names: vec!["Test".to_string()],
2191            ..MD044Config::default()
2192        };
2193        let rule = MD044ProperNames::from_config_struct(config);
2194
2195        let content = "<!-- Contact <mailto:test@example.com> for help -->\n";
2196        let ctx = create_context(content);
2197        let result = rule.check(&ctx).unwrap();
2198
2199        assert!(
2200            result.is_empty(),
2201            "Should not flag names inside angle-bracket mailto URLs: {result:?}"
2202        );
2203    }
2204
2205    #[test]
2206    fn test_angle_bracket_ftp_url_not_flagged() {
2207        let config = MD044Config {
2208            names: vec!["Test".to_string()],
2209            ..MD044Config::default()
2210        };
2211        let rule = MD044ProperNames::from_config_struct(config);
2212
2213        let content = "<!-- Download from <ftp://test.example.com/file> -->\n";
2214        let ctx = create_context(content);
2215        let result = rule.check(&ctx).unwrap();
2216
2217        assert!(
2218            result.is_empty(),
2219            "Should not flag names inside angle-bracket FTP URLs: {result:?}"
2220        );
2221    }
2222
2223    #[test]
2224    fn test_angle_bracket_url_fix_preserves_url() {
2225        // Fix should not modify text inside angle-bracket URLs
2226        let config = MD044Config {
2227            names: vec!["Test".to_string()],
2228            ..MD044Config::default()
2229        };
2230        let rule = MD044ProperNames::from_config_struct(config);
2231
2232        let content = "<!-- test text <https://www.example.test> -->\n";
2233        let ctx = create_context(content);
2234        let fixed = rule.fix(&ctx).unwrap();
2235
2236        // "test" in prose should be fixed, URL should be preserved
2237        assert!(
2238            fixed.contains("<https://www.example.test>"),
2239            "Fix should preserve angle-bracket URLs: {fixed}"
2240        );
2241        assert!(
2242            fixed.contains("Test text"),
2243            "Fix should correct prose 'test' to 'Test': {fixed}"
2244        );
2245    }
2246
2247    #[test]
2248    fn test_is_in_angle_bracket_url_helper() {
2249        // Direct tests of the helper function
2250        let line = "text <https://example.test> more text";
2251
2252        // Inside the URL
2253        assert!(MD044ProperNames::is_in_angle_bracket_url(line, 5)); // '<'
2254        assert!(MD044ProperNames::is_in_angle_bracket_url(line, 6)); // 'h'
2255        assert!(MD044ProperNames::is_in_angle_bracket_url(line, 15)); // middle of URL
2256        assert!(MD044ProperNames::is_in_angle_bracket_url(line, 26)); // '>'
2257
2258        // Outside the URL
2259        assert!(!MD044ProperNames::is_in_angle_bracket_url(line, 0)); // 't' at start
2260        assert!(!MD044ProperNames::is_in_angle_bracket_url(line, 4)); // space before '<'
2261        assert!(!MD044ProperNames::is_in_angle_bracket_url(line, 27)); // space after '>'
2262
2263        // Non-URL angle brackets
2264        assert!(!MD044ProperNames::is_in_angle_bracket_url("<notaurl>", 1));
2265
2266        // mailto scheme
2267        assert!(MD044ProperNames::is_in_angle_bracket_url(
2268            "<mailto:test@example.com>",
2269            10
2270        ));
2271
2272        // ftp scheme
2273        assert!(MD044ProperNames::is_in_angle_bracket_url(
2274            "<ftp://test.example.com>",
2275            10
2276        ));
2277    }
2278
2279    #[test]
2280    fn test_is_in_angle_bracket_url_uppercase_scheme() {
2281        // RFC 3986: URI schemes are case-insensitive
2282        assert!(MD044ProperNames::is_in_angle_bracket_url(
2283            "<HTTPS://test.example.com>",
2284            10
2285        ));
2286        assert!(MD044ProperNames::is_in_angle_bracket_url(
2287            "<Http://test.example.com>",
2288            10
2289        ));
2290    }
2291
2292    #[test]
2293    fn test_is_in_angle_bracket_url_uncommon_schemes() {
2294        // ssh scheme
2295        assert!(MD044ProperNames::is_in_angle_bracket_url(
2296            "<ssh://test@example.com>",
2297            10
2298        ));
2299        // file scheme
2300        assert!(MD044ProperNames::is_in_angle_bracket_url("<file:///test/path>", 10));
2301        // data scheme (no authority, just colon)
2302        assert!(MD044ProperNames::is_in_angle_bracket_url("<data:text/plain;test>", 10));
2303    }
2304
2305    #[test]
2306    fn test_is_in_angle_bracket_url_unclosed() {
2307        // Unclosed angle bracket should NOT match
2308        assert!(!MD044ProperNames::is_in_angle_bracket_url(
2309            "<https://test.example.com",
2310            10
2311        ));
2312    }
2313}
rumdl_lib/rules/md044_proper_names.rs

rumdl_lib/rules/
md044_proper_names.rs