Skip to main content

rumdl_lib/rules/
md044_proper_names.rs

1use crate::utils::fast_hash;
2use crate::utils::regex_cache::{escape_regex, get_cached_fancy_regex};
3
4use crate::rule::{Fix, LintError, LintResult, LintWarning, Rule, Severity};
5use std::collections::{HashMap, HashSet};
6use std::sync::{Arc, Mutex};
7
8mod md044_config;
9pub use md044_config::MD044Config;
10
11type WarningPosition = (usize, usize, String); // (line, column, found_name)
12
13/// Rule MD044: Proper names should be capitalized
14///
15/// See [docs/md044.md](../../docs/md044.md) for full documentation, configuration, and examples.
16///
17/// This rule is triggered when proper names are not capitalized correctly in the document.
18/// For example, if you have defined "JavaScript" as a proper name, the rule will flag any
19/// occurrences of "javascript" or "Javascript" as violations.
20///
21/// ## Purpose
22///
23/// Ensuring consistent capitalization of proper names improves document quality and
24/// professionalism. This is especially important for technical documentation where
25/// product names, programming languages, and technologies often have specific
26/// capitalization conventions.
27///
28/// ## Configuration Options
29///
30/// The rule supports the following configuration options:
31///
32/// ```yaml
33/// MD044:
34///   names: []                # List of proper names to check for correct capitalization
35///   code-blocks: false       # Whether to check code blocks (default: false)
36/// ```
37///
38/// Example configuration:
39///
40/// ```yaml
41/// MD044:
42///   names: ["JavaScript", "Node.js", "TypeScript"]
43///   code-blocks: true
44/// ```
45///
46/// ## Performance Optimizations
47///
48/// This rule implements several performance optimizations:
49///
50/// 1. **Regex Caching**: Pre-compiles and caches regex patterns for each proper name
51/// 2. **Content Caching**: Caches results based on content hashing for repeated checks
52/// 3. **Efficient Text Processing**: Uses optimized algorithms to avoid redundant text processing
53/// 4. **Smart Code Block Detection**: Efficiently identifies and optionally excludes code blocks
54///
55/// ## Edge Cases Handled
56///
57/// - **Word Boundaries**: Only matches complete words, not substrings within other words
58/// - **Case Sensitivity**: Properly handles case-specific matching
59/// - **Code Blocks**: Optionally checks code blocks (controlled by code-blocks setting)
60/// - **Markdown Formatting**: Handles proper names within Markdown formatting elements
61///
62/// ## Fix Behavior
63///
64/// When fixing issues, this rule replaces incorrect capitalization with the correct form
65/// as defined in the configuration.
66///
67#[derive(Clone)]
68pub struct MD044ProperNames {
69    config: MD044Config,
70    // Cache the combined regex pattern string
71    combined_pattern: Option<String>,
72    // Precomputed lowercase name variants for fast pre-checks
73    name_variants: Vec<String>,
74    // Cache for name violations by content hash
75    content_cache: Arc<Mutex<HashMap<u64, Vec<WarningPosition>>>>,
76}
77
78impl MD044ProperNames {
79    pub fn new(names: Vec<String>, code_blocks: bool) -> Self {
80        let config = MD044Config {
81            names,
82            code_blocks,
83            html_elements: true, // Default to checking HTML elements
84            html_comments: true, // Default to checking HTML comments
85        };
86        let combined_pattern = Self::create_combined_pattern(&config);
87        let name_variants = Self::build_name_variants(&config);
88        Self {
89            config,
90            combined_pattern,
91            name_variants,
92            content_cache: Arc::new(Mutex::new(HashMap::new())),
93        }
94    }
95
96    // Helper function for consistent ASCII normalization
97    fn ascii_normalize(s: &str) -> String {
98        s.replace(['é', 'è', 'ê', 'ë'], "e")
99            .replace(['à', 'á', 'â', 'ä', 'ã', 'å'], "a")
100            .replace(['ï', 'î', 'í', 'ì'], "i")
101            .replace(['ü', 'ú', 'ù', 'û'], "u")
102            .replace(['ö', 'ó', 'ò', 'ô', 'õ'], "o")
103            .replace('ñ', "n")
104            .replace('ç', "c")
105    }
106
107    pub fn from_config_struct(config: MD044Config) -> Self {
108        let combined_pattern = Self::create_combined_pattern(&config);
109        let name_variants = Self::build_name_variants(&config);
110        Self {
111            config,
112            combined_pattern,
113            name_variants,
114            content_cache: Arc::new(Mutex::new(HashMap::new())),
115        }
116    }
117
118    // Create a combined regex pattern for all proper names
119    fn create_combined_pattern(config: &MD044Config) -> Option<String> {
120        if config.names.is_empty() {
121            return None;
122        }
123
124        // Create patterns for all names and their variations
125        let mut patterns: Vec<String> = config
126            .names
127            .iter()
128            .flat_map(|name| {
129                let mut variations = vec![];
130                let lower_name = name.to_lowercase();
131
132                // Add the lowercase version
133                variations.push(escape_regex(&lower_name));
134
135                // Add version without dots
136                let lower_name_no_dots = lower_name.replace('.', "");
137                if lower_name != lower_name_no_dots {
138                    variations.push(escape_regex(&lower_name_no_dots));
139                }
140
141                // Add ASCII-normalized versions for common accented characters
142                let ascii_normalized = Self::ascii_normalize(&lower_name);
143
144                if ascii_normalized != lower_name {
145                    variations.push(escape_regex(&ascii_normalized));
146
147                    // Also add version without dots
148                    let ascii_no_dots = ascii_normalized.replace('.', "");
149                    if ascii_normalized != ascii_no_dots {
150                        variations.push(escape_regex(&ascii_no_dots));
151                    }
152                }
153
154                variations
155            })
156            .collect();
157
158        // Sort patterns by length (longest first) to avoid shorter patterns matching within longer ones
159        patterns.sort_by_key(|b| std::cmp::Reverse(b.len()));
160
161        // Combine all patterns into a single regex with capture groups
162        // Don't use \b as it doesn't work with Unicode - we'll check boundaries manually
163        Some(format!(r"(?i)({})", patterns.join("|")))
164    }
165
166    fn build_name_variants(config: &MD044Config) -> Vec<String> {
167        let mut variants = HashSet::new();
168        for name in &config.names {
169            let lower_name = name.to_lowercase();
170            variants.insert(lower_name.clone());
171
172            let lower_no_dots = lower_name.replace('.', "");
173            if lower_name != lower_no_dots {
174                variants.insert(lower_no_dots);
175            }
176
177            let ascii_normalized = Self::ascii_normalize(&lower_name);
178            if ascii_normalized != lower_name {
179                variants.insert(ascii_normalized.clone());
180
181                let ascii_no_dots = ascii_normalized.replace('.', "");
182                if ascii_normalized != ascii_no_dots {
183                    variants.insert(ascii_no_dots);
184                }
185            }
186        }
187
188        variants.into_iter().collect()
189    }
190
191    // Find all name violations in the content and return positions.
192    // `content_lower` is the pre-computed lowercase version of `content` to avoid redundant allocations.
193    fn find_name_violations(
194        &self,
195        content: &str,
196        ctx: &crate::lint_context::LintContext,
197        content_lower: &str,
198    ) -> Vec<WarningPosition> {
199        // Early return: if no names configured or content is empty
200        if self.config.names.is_empty() || content.is_empty() || self.combined_pattern.is_none() {
201            return Vec::new();
202        }
203
204        // Early return: quick check if any of the configured names might be in content
205        let has_potential_matches = self.name_variants.iter().any(|name| content_lower.contains(name));
206
207        if !has_potential_matches {
208            return Vec::new();
209        }
210
211        // Check if we have cached results
212        let hash = fast_hash(content);
213        {
214            // Use a separate scope for borrowing to minimize lock time
215            if let Ok(cache) = self.content_cache.lock()
216                && let Some(cached) = cache.get(&hash)
217            {
218                return cached.clone();
219            }
220        }
221
222        let mut violations = Vec::new();
223
224        // Get the regex from global cache
225        let combined_regex = match &self.combined_pattern {
226            Some(pattern) => match get_cached_fancy_regex(pattern) {
227                Ok(regex) => regex,
228                Err(_) => return Vec::new(),
229            },
230            None => return Vec::new(),
231        };
232
233        // Use ctx.lines for better performance
234        for (line_idx, line_info) in ctx.lines.iter().enumerate() {
235            let line_num = line_idx + 1;
236            let line = line_info.content(ctx.content);
237
238            // Skip code fence lines (```language or ~~~language)
239            let trimmed = line.trim_start();
240            if trimmed.starts_with("```") || trimmed.starts_with("~~~") {
241                continue;
242            }
243
244            // Skip if in code block (when code_blocks = false)
245            if !self.config.code_blocks && line_info.in_code_block {
246                continue;
247            }
248
249            // Skip if in HTML block (when html_elements = false)
250            if !self.config.html_elements && line_info.in_html_block {
251                continue;
252            }
253
254            // Skip HTML comments using pre-computed line flag
255            if !self.config.html_comments && line_info.in_html_comment {
256                continue;
257            }
258
259            // Skip JSX expressions and MDX comments (MDX flavor)
260            if line_info.in_jsx_expression || line_info.in_mdx_comment {
261                continue;
262            }
263
264            // Skip Obsidian comments (Obsidian flavor)
265            if line_info.in_obsidian_comment {
266                continue;
267            }
268
269            // Early return: skip lines that don't contain any potential matches
270            let line_lower = line.to_lowercase();
271            let has_line_matches = self.name_variants.iter().any(|name| line_lower.contains(name));
272
273            if !has_line_matches {
274                continue;
275            }
276
277            // Use the combined regex to find all matches in one pass
278            for cap_result in combined_regex.find_iter(line) {
279                match cap_result {
280                    Ok(cap) => {
281                        let found_name = &line[cap.start()..cap.end()];
282
283                        // Check word boundaries manually for Unicode support
284                        let start_pos = cap.start();
285                        let end_pos = cap.end();
286
287                        if !Self::is_at_word_boundary(line, start_pos, true)
288                            || !Self::is_at_word_boundary(line, end_pos, false)
289                        {
290                            continue; // Not at word boundary
291                        }
292
293                        // Skip if in inline code when code_blocks is false
294                        if !self.config.code_blocks {
295                            let byte_pos = line_info.byte_offset + cap.start();
296                            if ctx.is_in_code_block_or_span(byte_pos) {
297                                continue;
298                            }
299                        }
300
301                        // Skip if in link URL or reference definition
302                        let byte_pos = line_info.byte_offset + cap.start();
303                        if Self::is_in_link(ctx, byte_pos) {
304                            continue;
305                        }
306
307                        // Find which proper name this matches
308                        if let Some(proper_name) = self.get_proper_name_for(found_name) {
309                            // Only flag if it's not already correct
310                            if found_name != proper_name {
311                                violations.push((line_num, cap.start() + 1, found_name.to_string()));
312                            }
313                        }
314                    }
315                    Err(e) => {
316                        eprintln!("Regex execution error on line {line_num}: {e}");
317                    }
318                }
319            }
320        }
321
322        // Store in cache (ignore if mutex is poisoned)
323        if let Ok(mut cache) = self.content_cache.lock() {
324            cache.insert(hash, violations.clone());
325        }
326        violations
327    }
328
329    /// Check if a byte position is within a link URL (not link text)
330    ///
331    /// Link text should be checked for proper names, but URLs should be skipped.
332    /// For `[text](url)` - check text, skip url
333    /// For `[text][ref]` - check text, skip reference portion
334    /// For `[[text]]` (WikiLinks) - check text, skip brackets
335    fn is_in_link(ctx: &crate::lint_context::LintContext, byte_pos: usize) -> bool {
336        use pulldown_cmark::LinkType;
337
338        // Binary search links (sorted by byte_offset) to find candidate containing byte_pos
339        let link_idx = ctx.links.partition_point(|link| link.byte_offset <= byte_pos);
340        if link_idx > 0 {
341            let link = &ctx.links[link_idx - 1];
342            if byte_pos < link.byte_end {
343                // WikiLinks [[text]] start with '[[', regular links [text] start with '['
344                let text_start = if matches!(link.link_type, LinkType::WikiLink { .. }) {
345                    link.byte_offset + 2
346                } else {
347                    link.byte_offset + 1
348                };
349                let text_end = text_start + link.text.len();
350
351                // If position is within the text portion, skip only if text is a URL
352                if byte_pos >= text_start && byte_pos < text_end {
353                    return Self::link_text_is_url(&link.text);
354                }
355                // Position is in the URL/reference portion, skip it
356                return true;
357            }
358        }
359
360        // Binary search images (sorted by byte_offset) to find candidate containing byte_pos
361        let image_idx = ctx.images.partition_point(|img| img.byte_offset <= byte_pos);
362        if image_idx > 0 {
363            let image = &ctx.images[image_idx - 1];
364            if byte_pos < image.byte_end {
365                // Image starts with '![' so alt text starts at byte_offset + 2
366                let alt_start = image.byte_offset + 2;
367                let alt_end = alt_start + image.alt_text.len();
368
369                // If position is within the alt text portion, don't skip
370                if byte_pos >= alt_start && byte_pos < alt_end {
371                    return false;
372                }
373                // Position is in the URL/reference portion, skip it
374                return true;
375            }
376        }
377
378        // Check pre-computed reference definitions
379        ctx.is_in_reference_def(byte_pos)
380    }
381
382    /// Check if link text is a URL that should not have proper name corrections.
383    /// Matches markdownlint behavior: skip text starting with `http://`, `https://`, or `www.`.
384    fn link_text_is_url(text: &str) -> bool {
385        let lower = text.trim().to_ascii_lowercase();
386        lower.starts_with("http://") || lower.starts_with("https://") || lower.starts_with("www.")
387    }
388
389    // Check if a character is a word boundary (handles Unicode)
390    fn is_word_boundary_char(c: char) -> bool {
391        !c.is_alphanumeric()
392    }
393
394    // Check if a character is a word boundary using JavaScript `\b` semantics,
395    // where `_` is treated as a word character (part of JS `\w = [a-zA-Z0-9_]`).
396    // Used for matches inside HTML tags to match markdownlint's `_*\b` pattern behavior.
397    fn is_word_boundary_char_js(c: char) -> bool {
398        !c.is_alphanumeric() && c != '_'
399    }
400
401    // Check if the byte position `pos` is inside an HTML tag (`<...>`).
402    fn is_in_html_tag(line: &str, pos: usize) -> bool {
403        let before = &line[..pos.min(line.len())];
404        match (before.rfind('<'), before.rfind('>')) {
405            (Some(open), Some(close)) => open > close,
406            (Some(_), None) => true,
407            _ => false,
408        }
409    }
410
411    // Check if position is at a word boundary using O(1) byte-level lookups
412    fn is_at_word_boundary(content: &str, pos: usize, is_start: bool) -> bool {
413        // When inside an HTML tag, use JS word boundary semantics (`_` is a word char)
414        // to avoid false positives in HTML attribute values like `test_image`.
415        let boundary_fn: fn(char) -> bool = if Self::is_in_html_tag(content, pos) {
416            Self::is_word_boundary_char_js
417        } else {
418            Self::is_word_boundary_char
419        };
420        if is_start {
421            if pos == 0 {
422                return true;
423            }
424            // Get the character immediately before `pos`
425            match content[..pos].chars().next_back() {
426                None => true,
427                Some(c) => boundary_fn(c),
428            }
429        } else {
430            if pos >= content.len() {
431                return true;
432            }
433            // Get the character at `pos`
434            match content[pos..].chars().next() {
435                None => true,
436                Some(c) => boundary_fn(c),
437            }
438        }
439    }
440
441    // Get the proper name that should be used for a found name
442    fn get_proper_name_for(&self, found_name: &str) -> Option<String> {
443        let found_lower = found_name.to_lowercase();
444
445        // Iterate through the configured proper names
446        for name in &self.config.names {
447            let lower_name = name.to_lowercase();
448            let lower_name_no_dots = lower_name.replace('.', "");
449
450            // Direct match
451            if found_lower == lower_name || found_lower == lower_name_no_dots {
452                return Some(name.clone());
453            }
454
455            // Check ASCII-normalized version
456            let ascii_normalized = Self::ascii_normalize(&lower_name);
457
458            let ascii_no_dots = ascii_normalized.replace('.', "");
459
460            if found_lower == ascii_normalized || found_lower == ascii_no_dots {
461                return Some(name.clone());
462            }
463        }
464        None
465    }
466}
467
468impl Rule for MD044ProperNames {
469    fn name(&self) -> &'static str {
470        "MD044"
471    }
472
473    fn description(&self) -> &'static str {
474        "Proper names should have the correct capitalization"
475    }
476
477    fn should_skip(&self, ctx: &crate::lint_context::LintContext) -> bool {
478        if self.config.names.is_empty() {
479            return true;
480        }
481        // Quick check if any configured name variants exist (case-insensitive)
482        let content_lower = if ctx.content.is_ascii() {
483            ctx.content.to_ascii_lowercase()
484        } else {
485            ctx.content.to_lowercase()
486        };
487        !self.name_variants.iter().any(|name| content_lower.contains(name))
488    }
489
490    fn check(&self, ctx: &crate::lint_context::LintContext) -> LintResult {
491        let content = ctx.content;
492        if content.is_empty() || self.config.names.is_empty() || self.combined_pattern.is_none() {
493            return Ok(Vec::new());
494        }
495
496        // Compute lowercase content once and reuse across all checks
497        let content_lower = if content.is_ascii() {
498            content.to_ascii_lowercase()
499        } else {
500            content.to_lowercase()
501        };
502
503        // Early return: use pre-computed name_variants for the quick check
504        let has_potential_matches = self.name_variants.iter().any(|name| content_lower.contains(name));
505
506        if !has_potential_matches {
507            return Ok(Vec::new());
508        }
509
510        let line_index = &ctx.line_index;
511        let violations = self.find_name_violations(content, ctx, &content_lower);
512
513        let warnings = violations
514            .into_iter()
515            .filter_map(|(line, column, found_name)| {
516                self.get_proper_name_for(&found_name).map(|proper_name| LintWarning {
517                    rule_name: Some(self.name().to_string()),
518                    line,
519                    column,
520                    end_line: line,
521                    end_column: column + found_name.len(),
522                    message: format!("Proper name '{found_name}' should be '{proper_name}'"),
523                    severity: Severity::Warning,
524                    fix: Some(Fix {
525                        range: line_index.line_col_to_byte_range(line, column),
526                        replacement: proper_name,
527                    }),
528                })
529            })
530            .collect();
531
532        Ok(warnings)
533    }
534
535    fn fix(&self, ctx: &crate::lint_context::LintContext) -> Result<String, LintError> {
536        let content = ctx.content;
537        if content.is_empty() || self.config.names.is_empty() {
538            return Ok(content.to_string());
539        }
540
541        let content_lower = if content.is_ascii() {
542            content.to_ascii_lowercase()
543        } else {
544            content.to_lowercase()
545        };
546        let violations = self.find_name_violations(content, ctx, &content_lower);
547        if violations.is_empty() {
548            return Ok(content.to_string());
549        }
550
551        // Process lines and build the fixed content
552        let mut fixed_lines = Vec::new();
553
554        // Group violations by line
555        let mut violations_by_line: HashMap<usize, Vec<(usize, String)>> = HashMap::new();
556        for (line_num, col_num, found_name) in violations {
557            violations_by_line
558                .entry(line_num)
559                .or_default()
560                .push((col_num, found_name));
561        }
562
563        // Sort violations within each line in reverse order
564        for violations in violations_by_line.values_mut() {
565            violations.sort_by_key(|b| std::cmp::Reverse(b.0));
566        }
567
568        // Process each line
569        for (line_idx, line_info) in ctx.lines.iter().enumerate() {
570            let line_num = line_idx + 1;
571
572            if let Some(line_violations) = violations_by_line.get(&line_num) {
573                // This line has violations, fix them
574                let mut fixed_line = line_info.content(ctx.content).to_string();
575
576                for (col_num, found_name) in line_violations {
577                    if let Some(proper_name) = self.get_proper_name_for(found_name) {
578                        let start_col = col_num - 1; // Convert to 0-based
579                        let end_col = start_col + found_name.len();
580
581                        if end_col <= fixed_line.len()
582                            && fixed_line.is_char_boundary(start_col)
583                            && fixed_line.is_char_boundary(end_col)
584                        {
585                            fixed_line.replace_range(start_col..end_col, &proper_name);
586                        }
587                    }
588                }
589
590                fixed_lines.push(fixed_line);
591            } else {
592                // No violations on this line, keep it as is
593                fixed_lines.push(line_info.content(ctx.content).to_string());
594            }
595        }
596
597        // Join lines with newlines, preserving the original ending
598        let mut result = fixed_lines.join("\n");
599        if content.ends_with('\n') && !result.ends_with('\n') {
600            result.push('\n');
601        }
602        Ok(result)
603    }
604
605    fn as_any(&self) -> &dyn std::any::Any {
606        self
607    }
608
609    fn default_config_section(&self) -> Option<(String, toml::Value)> {
610        let json_value = serde_json::to_value(&self.config).ok()?;
611        Some((
612            self.name().to_string(),
613            crate::rule_config_serde::json_to_toml_value(&json_value)?,
614        ))
615    }
616
617    fn from_config(config: &crate::config::Config) -> Box<dyn Rule>
618    where
619        Self: Sized,
620    {
621        let rule_config = crate::rule_config_serde::load_rule_config::<MD044Config>(config);
622        Box::new(Self::from_config_struct(rule_config))
623    }
624}
625
626#[cfg(test)]
627mod tests {
628    use super::*;
629    use crate::lint_context::LintContext;
630
631    fn create_context(content: &str) -> LintContext<'_> {
632        LintContext::new(content, crate::config::MarkdownFlavor::Standard, None)
633    }
634
635    #[test]
636    fn test_correctly_capitalized_names() {
637        let rule = MD044ProperNames::new(
638            vec![
639                "JavaScript".to_string(),
640                "TypeScript".to_string(),
641                "Node.js".to_string(),
642            ],
643            true,
644        );
645
646        let content = "This document uses JavaScript, TypeScript, and Node.js correctly.";
647        let ctx = create_context(content);
648        let result = rule.check(&ctx).unwrap();
649        assert!(result.is_empty(), "Should not flag correctly capitalized names");
650    }
651
652    #[test]
653    fn test_incorrectly_capitalized_names() {
654        let rule = MD044ProperNames::new(vec!["JavaScript".to_string(), "TypeScript".to_string()], true);
655
656        let content = "This document uses javascript and typescript incorrectly.";
657        let ctx = create_context(content);
658        let result = rule.check(&ctx).unwrap();
659
660        assert_eq!(result.len(), 2, "Should flag two incorrect capitalizations");
661        assert_eq!(result[0].message, "Proper name 'javascript' should be 'JavaScript'");
662        assert_eq!(result[0].line, 1);
663        assert_eq!(result[0].column, 20);
664        assert_eq!(result[1].message, "Proper name 'typescript' should be 'TypeScript'");
665        assert_eq!(result[1].line, 1);
666        assert_eq!(result[1].column, 35);
667    }
668
669    #[test]
670    fn test_names_at_beginning_of_sentences() {
671        let rule = MD044ProperNames::new(vec!["JavaScript".to_string(), "Python".to_string()], true);
672
673        let content = "javascript is a great language. python is also popular.";
674        let ctx = create_context(content);
675        let result = rule.check(&ctx).unwrap();
676
677        assert_eq!(result.len(), 2, "Should flag names at beginning of sentences");
678        assert_eq!(result[0].line, 1);
679        assert_eq!(result[0].column, 1);
680        assert_eq!(result[1].line, 1);
681        assert_eq!(result[1].column, 33);
682    }
683
684    #[test]
685    fn test_names_in_code_blocks_checked_by_default() {
686        let rule = MD044ProperNames::new(vec!["JavaScript".to_string()], true);
687
688        let content = r#"Here is some text with JavaScript.
689
690```javascript
691// This javascript should be checked
692const lang = "javascript";
693```
694
695But this javascript should be flagged."#;
696
697        let ctx = create_context(content);
698        let result = rule.check(&ctx).unwrap();
699
700        assert_eq!(result.len(), 3, "Should flag javascript inside and outside code blocks");
701        assert_eq!(result[0].line, 4);
702        assert_eq!(result[1].line, 5);
703        assert_eq!(result[2].line, 8);
704    }
705
706    #[test]
707    fn test_names_in_code_blocks_ignored_when_disabled() {
708        let rule = MD044ProperNames::new(
709            vec!["JavaScript".to_string()],
710            false, // code_blocks = false means skip code blocks
711        );
712
713        let content = r#"```
714javascript in code block
715```"#;
716
717        let ctx = create_context(content);
718        let result = rule.check(&ctx).unwrap();
719
720        assert_eq!(
721            result.len(),
722            0,
723            "Should not flag javascript in code blocks when code_blocks is false"
724        );
725    }
726
727    #[test]
728    fn test_names_in_inline_code_checked_by_default() {
729        let rule = MD044ProperNames::new(vec!["JavaScript".to_string()], true);
730
731        let content = "This is `javascript` in inline code and javascript outside.";
732        let ctx = create_context(content);
733        let result = rule.check(&ctx).unwrap();
734
735        // When code_blocks=true, inline code should be checked
736        assert_eq!(result.len(), 2, "Should flag javascript inside and outside inline code");
737        assert_eq!(result[0].column, 10); // javascript in inline code
738        assert_eq!(result[1].column, 41); // javascript outside
739    }
740
741    #[test]
742    fn test_multiple_names_in_same_line() {
743        let rule = MD044ProperNames::new(
744            vec!["JavaScript".to_string(), "TypeScript".to_string(), "React".to_string()],
745            true,
746        );
747
748        let content = "I use javascript, typescript, and react in my projects.";
749        let ctx = create_context(content);
750        let result = rule.check(&ctx).unwrap();
751
752        assert_eq!(result.len(), 3, "Should flag all three incorrect names");
753        assert_eq!(result[0].message, "Proper name 'javascript' should be 'JavaScript'");
754        assert_eq!(result[1].message, "Proper name 'typescript' should be 'TypeScript'");
755        assert_eq!(result[2].message, "Proper name 'react' should be 'React'");
756    }
757
758    #[test]
759    fn test_case_sensitivity() {
760        let rule = MD044ProperNames::new(vec!["JavaScript".to_string()], true);
761
762        let content = "JAVASCRIPT, Javascript, javascript, and JavaScript variations.";
763        let ctx = create_context(content);
764        let result = rule.check(&ctx).unwrap();
765
766        assert_eq!(result.len(), 3, "Should flag all incorrect case variations");
767        // JavaScript (correct) should not be flagged
768        assert!(result.iter().all(|w| w.message.contains("should be 'JavaScript'")));
769    }
770
771    #[test]
772    fn test_configuration_with_custom_name_list() {
773        let config = MD044Config {
774            names: vec!["GitHub".to_string(), "GitLab".to_string(), "DevOps".to_string()],
775            code_blocks: true,
776            html_elements: true,
777            html_comments: true,
778        };
779        let rule = MD044ProperNames::from_config_struct(config);
780
781        let content = "We use github, gitlab, and devops for our workflow.";
782        let ctx = create_context(content);
783        let result = rule.check(&ctx).unwrap();
784
785        assert_eq!(result.len(), 3, "Should flag all custom names");
786        assert_eq!(result[0].message, "Proper name 'github' should be 'GitHub'");
787        assert_eq!(result[1].message, "Proper name 'gitlab' should be 'GitLab'");
788        assert_eq!(result[2].message, "Proper name 'devops' should be 'DevOps'");
789    }
790
791    #[test]
792    fn test_empty_configuration() {
793        let rule = MD044ProperNames::new(vec![], true);
794
795        let content = "This has javascript and typescript but no configured names.";
796        let ctx = create_context(content);
797        let result = rule.check(&ctx).unwrap();
798
799        assert!(result.is_empty(), "Should not flag anything with empty configuration");
800    }
801
802    #[test]
803    fn test_names_with_special_characters() {
804        let rule = MD044ProperNames::new(
805            vec!["Node.js".to_string(), "ASP.NET".to_string(), "C++".to_string()],
806            true,
807        );
808
809        let content = "We use nodejs, asp.net, ASP.NET, and c++ in our stack.";
810        let ctx = create_context(content);
811        let result = rule.check(&ctx).unwrap();
812
813        // nodejs should match Node.js (dotless variation)
814        // asp.net should be flagged (wrong case)
815        // ASP.NET should not be flagged (correct)
816        // c++ should be flagged
817        assert_eq!(result.len(), 3, "Should handle special characters correctly");
818
819        let messages: Vec<&str> = result.iter().map(|w| w.message.as_str()).collect();
820        assert!(messages.contains(&"Proper name 'nodejs' should be 'Node.js'"));
821        assert!(messages.contains(&"Proper name 'asp.net' should be 'ASP.NET'"));
822        assert!(messages.contains(&"Proper name 'c++' should be 'C++'"));
823    }
824
825    #[test]
826    fn test_word_boundaries() {
827        let rule = MD044ProperNames::new(vec!["Java".to_string(), "Script".to_string()], true);
828
829        let content = "JavaScript is not java or script, but Java and Script are separate.";
830        let ctx = create_context(content);
831        let result = rule.check(&ctx).unwrap();
832
833        // Should only flag lowercase "java" and "script" as separate words
834        assert_eq!(result.len(), 2, "Should respect word boundaries");
835        assert!(result.iter().any(|w| w.column == 19)); // "java" position
836        assert!(result.iter().any(|w| w.column == 27)); // "script" position
837    }
838
839    #[test]
840    fn test_fix_method() {
841        let rule = MD044ProperNames::new(
842            vec![
843                "JavaScript".to_string(),
844                "TypeScript".to_string(),
845                "Node.js".to_string(),
846            ],
847            true,
848        );
849
850        let content = "I love javascript, typescript, and nodejs!";
851        let ctx = create_context(content);
852        let fixed = rule.fix(&ctx).unwrap();
853
854        assert_eq!(fixed, "I love JavaScript, TypeScript, and Node.js!");
855    }
856
857    #[test]
858    fn test_fix_multiple_occurrences() {
859        let rule = MD044ProperNames::new(vec!["Python".to_string()], true);
860
861        let content = "python is great. I use python daily. PYTHON is powerful.";
862        let ctx = create_context(content);
863        let fixed = rule.fix(&ctx).unwrap();
864
865        assert_eq!(fixed, "Python is great. I use Python daily. Python is powerful.");
866    }
867
868    #[test]
869    fn test_fix_checks_code_blocks_by_default() {
870        let rule = MD044ProperNames::new(vec!["JavaScript".to_string()], true);
871
872        let content = r#"I love javascript.
873
874```
875const lang = "javascript";
876```
877
878More javascript here."#;
879
880        let ctx = create_context(content);
881        let fixed = rule.fix(&ctx).unwrap();
882
883        let expected = r#"I love JavaScript.
884
885```
886const lang = "JavaScript";
887```
888
889More JavaScript here."#;
890
891        assert_eq!(fixed, expected);
892    }
893
894    #[test]
895    fn test_multiline_content() {
896        let rule = MD044ProperNames::new(vec!["Rust".to_string(), "Python".to_string()], true);
897
898        let content = r#"First line with rust.
899Second line with python.
900Third line with RUST and PYTHON."#;
901
902        let ctx = create_context(content);
903        let result = rule.check(&ctx).unwrap();
904
905        assert_eq!(result.len(), 4, "Should flag all incorrect occurrences");
906        assert_eq!(result[0].line, 1);
907        assert_eq!(result[1].line, 2);
908        assert_eq!(result[2].line, 3);
909        assert_eq!(result[3].line, 3);
910    }
911
912    #[test]
913    fn test_default_config() {
914        let config = MD044Config::default();
915        assert!(config.names.is_empty());
916        assert!(!config.code_blocks); // Default is false (skip code blocks)
917    }
918
919    #[test]
920    fn test_performance_with_many_names() {
921        let mut names = vec![];
922        for i in 0..50 {
923            names.push(format!("ProperName{i}"));
924        }
925
926        let rule = MD044ProperNames::new(names, true);
927
928        let content = "This has propername0, propername25, and propername49 incorrectly.";
929        let ctx = create_context(content);
930        let result = rule.check(&ctx).unwrap();
931
932        assert_eq!(result.len(), 3, "Should handle many configured names efficiently");
933    }
934
935    #[test]
936    fn test_large_name_count_performance() {
937        // Verify MD044 can handle large numbers of names without regex limitations
938        // This test confirms that fancy-regex handles large patterns well
939        let names = (0..1000).map(|i| format!("ProperName{i}")).collect::<Vec<_>>();
940
941        let rule = MD044ProperNames::new(names, true);
942
943        // The combined pattern should be created successfully
944        assert!(rule.combined_pattern.is_some());
945
946        // Should be able to check content without errors
947        let content = "This has propername0 and propername999 in it.";
948        let ctx = create_context(content);
949        let result = rule.check(&ctx).unwrap();
950
951        // Should detect both incorrect names
952        assert_eq!(result.len(), 2, "Should handle 1000 names without issues");
953    }
954
955    #[test]
956    fn test_cache_behavior() {
957        let rule = MD044ProperNames::new(vec!["JavaScript".to_string()], true);
958
959        let content = "Using javascript here.";
960        let ctx = create_context(content);
961
962        // First check
963        let result1 = rule.check(&ctx).unwrap();
964        assert_eq!(result1.len(), 1);
965
966        // Second check should use cache
967        let result2 = rule.check(&ctx).unwrap();
968        assert_eq!(result2.len(), 1);
969
970        // Results should be identical
971        assert_eq!(result1[0].line, result2[0].line);
972        assert_eq!(result1[0].column, result2[0].column);
973    }
974
975    #[test]
976    fn test_html_comments_not_checked_when_disabled() {
977        let config = MD044Config {
978            names: vec!["JavaScript".to_string()],
979            code_blocks: true,    // Check code blocks
980            html_elements: true,  // Check HTML elements
981            html_comments: false, // Don't check HTML comments
982        };
983        let rule = MD044ProperNames::from_config_struct(config);
984
985        let content = r#"Regular javascript here.
986<!-- This javascript in HTML comment should be ignored -->
987More javascript outside."#;
988
989        let ctx = create_context(content);
990        let result = rule.check(&ctx).unwrap();
991
992        assert_eq!(result.len(), 2, "Should only flag javascript outside HTML comments");
993        assert_eq!(result[0].line, 1);
994        assert_eq!(result[1].line, 3);
995    }
996
997    #[test]
998    fn test_html_comments_checked_when_enabled() {
999        let config = MD044Config {
1000            names: vec!["JavaScript".to_string()],
1001            code_blocks: true,   // Check code blocks
1002            html_elements: true, // Check HTML elements
1003            html_comments: true, // Check HTML comments
1004        };
1005        let rule = MD044ProperNames::from_config_struct(config);
1006
1007        let content = r#"Regular javascript here.
1008<!-- This javascript in HTML comment should be checked -->
1009More javascript outside."#;
1010
1011        let ctx = create_context(content);
1012        let result = rule.check(&ctx).unwrap();
1013
1014        assert_eq!(
1015            result.len(),
1016            3,
1017            "Should flag all javascript occurrences including in HTML comments"
1018        );
1019    }
1020
1021    #[test]
1022    fn test_multiline_html_comments() {
1023        let config = MD044Config {
1024            names: vec!["Python".to_string(), "JavaScript".to_string()],
1025            code_blocks: true,    // Check code blocks
1026            html_elements: true,  // Check HTML elements
1027            html_comments: false, // Don't check HTML comments
1028        };
1029        let rule = MD044ProperNames::from_config_struct(config);
1030
1031        let content = r#"Regular python here.
1032<!--
1033This is a multiline comment
1034with javascript and python
1035that should be ignored
1036-->
1037More javascript outside."#;
1038
1039        let ctx = create_context(content);
1040        let result = rule.check(&ctx).unwrap();
1041
1042        assert_eq!(result.len(), 2, "Should only flag names outside HTML comments");
1043        assert_eq!(result[0].line, 1); // python
1044        assert_eq!(result[1].line, 7); // javascript
1045    }
1046
1047    #[test]
1048    fn test_fix_preserves_html_comments_when_disabled() {
1049        let config = MD044Config {
1050            names: vec!["JavaScript".to_string()],
1051            code_blocks: true,    // Check code blocks
1052            html_elements: true,  // Check HTML elements
1053            html_comments: false, // Don't check HTML comments
1054        };
1055        let rule = MD044ProperNames::from_config_struct(config);
1056
1057        let content = r#"javascript here.
1058<!-- javascript in comment -->
1059More javascript."#;
1060
1061        let ctx = create_context(content);
1062        let fixed = rule.fix(&ctx).unwrap();
1063
1064        let expected = r#"JavaScript here.
1065<!-- javascript in comment -->
1066More JavaScript."#;
1067
1068        assert_eq!(
1069            fixed, expected,
1070            "Should not fix names inside HTML comments when disabled"
1071        );
1072    }
1073
1074    #[test]
1075    fn test_proper_names_in_link_text_are_flagged() {
1076        let rule = MD044ProperNames::new(
1077            vec!["JavaScript".to_string(), "Node.js".to_string(), "Python".to_string()],
1078            true,
1079        );
1080
1081        let content = r#"Check this [javascript documentation](https://javascript.info) for info.
1082
1083Visit [node.js homepage](https://nodejs.org) and [python tutorial](https://python.org).
1084
1085Real javascript should be flagged.
1086
1087Also see the [typescript guide][ts-ref] for more.
1088
1089Real python should be flagged too.
1090
1091[ts-ref]: https://typescript.org/handbook"#;
1092
1093        let ctx = create_context(content);
1094        let result = rule.check(&ctx).unwrap();
1095
1096        // Link text should be checked, URLs should not be checked
1097        // Line 1: [javascript documentation] - "javascript" should be flagged
1098        // Line 3: [node.js homepage] - "node.js" should be flagged (matches "Node.js")
1099        // Line 3: [python tutorial] - "python" should be flagged
1100        // Line 5: standalone javascript
1101        // Line 9: standalone python
1102        assert_eq!(result.len(), 5, "Expected 5 warnings: 3 in link text + 2 standalone");
1103
1104        // Verify line numbers for link text warnings
1105        let line_1_warnings: Vec<_> = result.iter().filter(|w| w.line == 1).collect();
1106        assert_eq!(line_1_warnings.len(), 1);
1107        assert!(
1108            line_1_warnings[0]
1109                .message
1110                .contains("'javascript' should be 'JavaScript'")
1111        );
1112
1113        let line_3_warnings: Vec<_> = result.iter().filter(|w| w.line == 3).collect();
1114        assert_eq!(line_3_warnings.len(), 2); // node.js and python
1115
1116        // Standalone warnings
1117        assert!(result.iter().any(|w| w.line == 5 && w.message.contains("'javascript'")));
1118        assert!(result.iter().any(|w| w.line == 9 && w.message.contains("'python'")));
1119    }
1120
1121    #[test]
1122    fn test_link_urls_not_flagged() {
1123        let rule = MD044ProperNames::new(vec!["JavaScript".to_string()], true);
1124
1125        // URL contains "javascript" but should NOT be flagged
1126        let content = r#"[Link Text](https://javascript.info/guide)"#;
1127
1128        let ctx = create_context(content);
1129        let result = rule.check(&ctx).unwrap();
1130
1131        // URL should not be checked
1132        assert!(result.is_empty(), "URLs should not be checked for proper names");
1133    }
1134
1135    #[test]
1136    fn test_proper_names_in_image_alt_text_are_flagged() {
1137        let rule = MD044ProperNames::new(vec!["JavaScript".to_string()], true);
1138
1139        let content = r#"Here is a ![javascript logo](javascript.png "javascript icon") image.
1140
1141Real javascript should be flagged."#;
1142
1143        let ctx = create_context(content);
1144        let result = rule.check(&ctx).unwrap();
1145
1146        // Image alt text should be checked, URL and title should not be checked
1147        // Line 1: ![javascript logo] - "javascript" should be flagged
1148        // Line 3: standalone javascript
1149        assert_eq!(result.len(), 2, "Expected 2 warnings: 1 in alt text + 1 standalone");
1150        assert!(result[0].message.contains("'javascript' should be 'JavaScript'"));
1151        assert!(result[0].line == 1); // "![javascript logo]"
1152        assert!(result[1].message.contains("'javascript' should be 'JavaScript'"));
1153        assert!(result[1].line == 3); // "Real javascript should be flagged."
1154    }
1155
1156    #[test]
1157    fn test_image_urls_not_flagged() {
1158        let rule = MD044ProperNames::new(vec!["JavaScript".to_string()], true);
1159
1160        // URL contains "javascript" but should NOT be flagged
1161        let content = r#"![Logo](https://javascript.info/logo.png)"#;
1162
1163        let ctx = create_context(content);
1164        let result = rule.check(&ctx).unwrap();
1165
1166        // Image URL should not be checked
1167        assert!(result.is_empty(), "Image URLs should not be checked for proper names");
1168    }
1169
1170    #[test]
1171    fn test_reference_link_text_flagged_but_definition_not() {
1172        let rule = MD044ProperNames::new(vec!["JavaScript".to_string(), "TypeScript".to_string()], true);
1173
1174        let content = r#"Check the [javascript guide][js-ref] for details.
1175
1176Real javascript should be flagged.
1177
1178[js-ref]: https://javascript.info/typescript/guide"#;
1179
1180        let ctx = create_context(content);
1181        let result = rule.check(&ctx).unwrap();
1182
1183        // Link text should be checked, reference definitions should not
1184        // Line 1: [javascript guide] - should be flagged
1185        // Line 3: standalone javascript - should be flagged
1186        // Line 5: reference definition - should NOT be flagged
1187        assert_eq!(result.len(), 2, "Expected 2 warnings: 1 in link text + 1 standalone");
1188        assert!(result.iter().any(|w| w.line == 1 && w.message.contains("'javascript'")));
1189        assert!(result.iter().any(|w| w.line == 3 && w.message.contains("'javascript'")));
1190    }
1191
1192    #[test]
1193    fn test_reference_definitions_not_flagged() {
1194        let rule = MD044ProperNames::new(vec!["JavaScript".to_string()], true);
1195
1196        // Reference definition should NOT be flagged
1197        let content = r#"[js-ref]: https://javascript.info/guide"#;
1198
1199        let ctx = create_context(content);
1200        let result = rule.check(&ctx).unwrap();
1201
1202        // Reference definition URLs should not be checked
1203        assert!(result.is_empty(), "Reference definitions should not be checked");
1204    }
1205
1206    #[test]
1207    fn test_wikilinks_text_is_flagged() {
1208        let rule = MD044ProperNames::new(vec!["JavaScript".to_string()], true);
1209
1210        // WikiLinks [[destination]] should have their text checked
1211        let content = r#"[[javascript]]
1212
1213Regular javascript here.
1214
1215[[JavaScript|display text]]"#;
1216
1217        let ctx = create_context(content);
1218        let result = rule.check(&ctx).unwrap();
1219
1220        // Line 1: [[javascript]] - should be flagged (WikiLink text)
1221        // Line 3: standalone javascript - should be flagged
1222        // Line 5: [[JavaScript|display text]] - correct capitalization, no flag
1223        assert_eq!(result.len(), 2, "Expected 2 warnings: 1 in WikiLink + 1 standalone");
1224        assert!(
1225            result
1226                .iter()
1227                .any(|w| w.line == 1 && w.column == 3 && w.message.contains("'javascript'"))
1228        );
1229        assert!(result.iter().any(|w| w.line == 3 && w.message.contains("'javascript'")));
1230    }
1231
1232    #[test]
1233    fn test_url_link_text_not_flagged() {
1234        let rule = MD044ProperNames::new(vec!["GitHub".to_string()], true);
1235
1236        // Link text that is itself a URL should not be flagged
1237        let content = r#"[https://github.com/org/repo](https://github.com/org/repo)
1238
1239[http://github.com/org/repo](http://github.com/org/repo)
1240
1241[www.github.com/org/repo](https://www.github.com/org/repo)"#;
1242
1243        let ctx = create_context(content);
1244        let result = rule.check(&ctx).unwrap();
1245
1246        assert!(
1247            result.is_empty(),
1248            "URL-like link text should not be flagged, got: {result:?}"
1249        );
1250    }
1251
1252    #[test]
1253    fn test_url_link_text_with_leading_space_not_flagged() {
1254        let rule = MD044ProperNames::new(vec!["GitHub".to_string()], true);
1255
1256        // Leading/trailing whitespace in link text should be trimmed before URL check
1257        let content = r#"[ https://github.com/org/repo](https://github.com/org/repo)"#;
1258
1259        let ctx = create_context(content);
1260        let result = rule.check(&ctx).unwrap();
1261
1262        assert!(
1263            result.is_empty(),
1264            "URL-like link text with leading space should not be flagged, got: {result:?}"
1265        );
1266    }
1267
1268    #[test]
1269    fn test_url_link_text_uppercase_scheme_not_flagged() {
1270        let rule = MD044ProperNames::new(vec!["GitHub".to_string()], true);
1271
1272        let content = r#"[HTTPS://GITHUB.COM/org/repo](https://github.com/org/repo)"#;
1273
1274        let ctx = create_context(content);
1275        let result = rule.check(&ctx).unwrap();
1276
1277        assert!(
1278            result.is_empty(),
1279            "URL-like link text with uppercase scheme should not be flagged, got: {result:?}"
1280        );
1281    }
1282
1283    #[test]
1284    fn test_non_url_link_text_still_flagged() {
1285        let rule = MD044ProperNames::new(vec!["GitHub".to_string()], true);
1286
1287        // Link text that is NOT a URL should still be flagged
1288        let content = r#"[github.com/org/repo](https://github.com/org/repo)
1289
1290[Visit github](https://github.com/org/repo)
1291
1292[//github.com/org/repo](//github.com/org/repo)
1293
1294[ftp://github.com/org/repo](ftp://github.com/org/repo)"#;
1295
1296        let ctx = create_context(content);
1297        let result = rule.check(&ctx).unwrap();
1298
1299        assert_eq!(result.len(), 4, "Non-URL link text should be flagged, got: {result:?}");
1300        assert!(result.iter().any(|w| w.line == 1)); // github.com (no protocol)
1301        assert!(result.iter().any(|w| w.line == 3)); // Visit github
1302        assert!(result.iter().any(|w| w.line == 5)); // //github.com (protocol-relative)
1303        assert!(result.iter().any(|w| w.line == 7)); // ftp://github.com
1304    }
1305
1306    #[test]
1307    fn test_url_link_text_fix_not_applied() {
1308        let rule = MD044ProperNames::new(vec!["GitHub".to_string()], true);
1309
1310        let content = "[https://github.com/org/repo](https://github.com/org/repo)\n";
1311
1312        let ctx = create_context(content);
1313        let result = rule.fix(&ctx).unwrap();
1314
1315        assert_eq!(result, content, "Fix should not modify URL-like link text");
1316    }
1317
1318    #[test]
1319    fn test_mixed_url_and_regular_link_text() {
1320        let rule = MD044ProperNames::new(vec!["GitHub".to_string()], true);
1321
1322        // Mix of URL link text (should skip) and regular text (should flag)
1323        let content = r#"[https://github.com/org/repo](https://github.com/org/repo)
1324
1325Visit [github documentation](https://github.com/docs) for details.
1326
1327[www.github.com/pricing](https://www.github.com/pricing)"#;
1328
1329        let ctx = create_context(content);
1330        let result = rule.check(&ctx).unwrap();
1331
1332        // Only line 3 should be flagged ("github documentation" is not a URL)
1333        assert_eq!(
1334            result.len(),
1335            1,
1336            "Only non-URL link text should be flagged, got: {result:?}"
1337        );
1338        assert_eq!(result[0].line, 3);
1339    }
1340
1341    #[test]
1342    fn test_html_attribute_underscore_no_false_positive() {
1343        // Regression test for issue #443:
1344        // Names in HTML attributes should not be flagged when adjacent to underscores.
1345        // HTML attribute values treat `_` as a word character (JS `\b` semantics),
1346        // so `test` in `test_image` should NOT match.
1347        let rule = MD044ProperNames::new(vec!["Test".to_string()], true);
1348        let content = "# Heading\n\ntest\n\n<img src=\"www.example.test/test_image.png\">\n";
1349        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1350        let result = rule.check(&ctx).unwrap();
1351
1352        // Only line 3 (plain "test") and line 5 col 23 (".test/" - no underscore after) should fire
1353        // NOT line 5 col 28 ("test_image" - underscore after test)
1354        let line5_violations: Vec<_> = result.iter().filter(|w| w.line == 5).collect();
1355        assert_eq!(
1356            line5_violations.len(),
1357            1,
1358            "Should flag only 'test' in 'example.test/' not in 'test_image': {line5_violations:?}"
1359        );
1360        assert_eq!(line5_violations[0].column, 23, "Should flag col 23 (example.test)");
1361
1362        // Verify plain text line is still flagged
1363        let line3_violations: Vec<_> = result.iter().filter(|w| w.line == 3).collect();
1364        assert_eq!(line3_violations.len(), 1, "Plain 'test' on line 3 should be flagged");
1365    }
1366
1367    #[test]
1368    fn test_html_attribute_underscore_prefix_no_false_positive() {
1369        // Names with underscore prefix in HTML attributes (like data-test_id) should not
1370        // be flagged when the name is adjacent to underscores.
1371        let rule = MD044ProperNames::new(vec!["Test".to_string()], true);
1372        let content = "# Heading\n\n<span data-test_id=\"value\">Test content</span>\n";
1373        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1374        let result = rule.check(&ctx).unwrap();
1375
1376        // "data-test_id" → "test" preceded by "-" (boundary) but followed by "_" → no flag in HTML
1377        // "Test content" → already correct capitalization → no flag
1378        assert!(
1379            result.is_empty(),
1380            "Should not flag 'test' in 'data-test_id' inside HTML or 'Test' that is already correct: {result:?}"
1381        );
1382    }
1383
1384    #[test]
1385    fn test_html_attribute_name_in_url_still_flagged() {
1386        // Names in HTML attribute URLs that are NOT adjacent to underscores should still be flagged.
1387        let rule = MD044ProperNames::new(vec!["Test".to_string()], true);
1388        let content = "# Heading\n\n<a href=\"https://example.test/page\">test link</a>\n";
1389        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1390        let result = rule.check(&ctx).unwrap();
1391
1392        // "example.test" → "test" preceded by "." (boundary) and followed by "/" (boundary) → flagged
1393        // "test link" is outside the HTML tag (between tags) → treated as plain text → flagged
1394        assert_eq!(
1395            result.len(),
1396            2,
1397            "Should flag 'test' in href URL and in anchor text: {result:?}"
1398        );
1399        let cols: Vec<usize> = result.iter().map(|w| w.column).collect();
1400        assert!(
1401            cols.contains(&26),
1402            "Should flag col 26 (example.test in href): {cols:?}"
1403        );
1404        assert!(
1405            cols.contains(&37),
1406            "Should flag col 37 (test link in anchor text): {cols:?}"
1407        );
1408    }
1409
1410    #[test]
1411    fn test_plain_text_underscore_boundary_unchanged() {
1412        // Plain text (outside HTML tags) still uses original word boundary semantics where
1413        // underscore is a boundary character, matching markdownlint's behavior via AST splitting.
1414        let rule = MD044ProperNames::new(vec!["Test".to_string()], true);
1415        let content = "# Heading\n\ntest_image is here and just_test ends here\n";
1416        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1417        let result = rule.check(&ctx).unwrap();
1418
1419        // Both "test_image" (test at start) and "just_test" (test at end) are flagged
1420        // because in plain text, "_" is a word boundary
1421        assert_eq!(
1422            result.len(),
1423            2,
1424            "Should flag 'test' in both 'test_image' and 'just_test': {result:?}"
1425        );
1426        let cols: Vec<usize> = result.iter().map(|w| w.column).collect();
1427        assert!(cols.contains(&1), "Should flag col 1 (test_image): {cols:?}");
1428        assert!(cols.contains(&29), "Should flag col 29 (just_test): {cols:?}");
1429    }
1430}