1use crate::utils::fast_hash;
2use crate::utils::regex_cache::{escape_regex, get_cached_fancy_regex};
3
4use crate::rule::{Fix, LintError, LintResult, LintWarning, Rule, Severity};
5use fancy_regex::Regex;
6use std::collections::{HashMap, HashSet};
7use std::sync::LazyLock;
8use std::sync::{Arc, Mutex};
9
10mod md044_config;
11use md044_config::MD044Config;
12
13static HTML_COMMENT_REGEX: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"<!--([\s\S]*?)-->").unwrap());
14static REF_DEF_REGEX: LazyLock<regex::Regex> = LazyLock::new(|| {
16 regex::Regex::new(r#"(?m)^[ ]{0,3}\[([^\]]+)\]:\s*([^\s]+)(?:\s+(?:"([^"]*)"|'([^']*)'))?$"#).unwrap()
17});
18
19type WarningPosition = (usize, usize, String); #[derive(Clone)]
76pub struct MD044ProperNames {
77 config: MD044Config,
78 combined_pattern: Option<String>,
80 name_variants: Vec<String>,
82 content_cache: Arc<Mutex<HashMap<u64, Vec<WarningPosition>>>>,
84}
85
86impl MD044ProperNames {
87 pub fn new(names: Vec<String>, code_blocks: bool) -> Self {
88 let config = MD044Config {
89 names,
90 code_blocks,
91 html_elements: true, html_comments: true, };
94 let combined_pattern = Self::create_combined_pattern(&config);
95 let name_variants = Self::build_name_variants(&config);
96 Self {
97 config,
98 combined_pattern,
99 name_variants,
100 content_cache: Arc::new(Mutex::new(HashMap::new())),
101 }
102 }
103
104 fn ascii_normalize(s: &str) -> String {
106 s.replace(['é', 'è', 'ê', 'ë'], "e")
107 .replace(['à', 'á', 'â', 'ä', 'ã', 'å'], "a")
108 .replace(['ï', 'î', 'í', 'ì'], "i")
109 .replace(['ü', 'ú', 'ù', 'û'], "u")
110 .replace(['ö', 'ó', 'ò', 'ô', 'õ'], "o")
111 .replace('ñ', "n")
112 .replace('ç', "c")
113 }
114
115 pub fn from_config_struct(config: MD044Config) -> Self {
116 let combined_pattern = Self::create_combined_pattern(&config);
117 let name_variants = Self::build_name_variants(&config);
118 Self {
119 config,
120 combined_pattern,
121 name_variants,
122 content_cache: Arc::new(Mutex::new(HashMap::new())),
123 }
124 }
125
126 fn create_combined_pattern(config: &MD044Config) -> Option<String> {
128 if config.names.is_empty() {
129 return None;
130 }
131
132 let mut patterns: Vec<String> = config
134 .names
135 .iter()
136 .flat_map(|name| {
137 let mut variations = vec![];
138 let lower_name = name.to_lowercase();
139
140 variations.push(escape_regex(&lower_name));
142
143 let lower_name_no_dots = lower_name.replace('.', "");
145 if lower_name != lower_name_no_dots {
146 variations.push(escape_regex(&lower_name_no_dots));
147 }
148
149 let ascii_normalized = Self::ascii_normalize(&lower_name);
151
152 if ascii_normalized != lower_name {
153 variations.push(escape_regex(&ascii_normalized));
154
155 let ascii_no_dots = ascii_normalized.replace('.', "");
157 if ascii_normalized != ascii_no_dots {
158 variations.push(escape_regex(&ascii_no_dots));
159 }
160 }
161
162 variations
163 })
164 .collect();
165
166 patterns.sort_by_key(|b| std::cmp::Reverse(b.len()));
168
169 Some(format!(r"(?i)({})", patterns.join("|")))
172 }
173
174 fn build_name_variants(config: &MD044Config) -> Vec<String> {
175 let mut variants = HashSet::new();
176 for name in &config.names {
177 let lower_name = name.to_lowercase();
178 variants.insert(lower_name.clone());
179
180 let lower_no_dots = lower_name.replace('.', "");
181 if lower_name != lower_no_dots {
182 variants.insert(lower_no_dots);
183 }
184
185 let ascii_normalized = Self::ascii_normalize(&lower_name);
186 if ascii_normalized != lower_name {
187 variants.insert(ascii_normalized.clone());
188
189 let ascii_no_dots = ascii_normalized.replace('.', "");
190 if ascii_normalized != ascii_no_dots {
191 variants.insert(ascii_no_dots);
192 }
193 }
194 }
195
196 variants.into_iter().collect()
197 }
198
199 fn find_name_violations(&self, content: &str, ctx: &crate::lint_context::LintContext) -> Vec<WarningPosition> {
201 if self.config.names.is_empty() || content.is_empty() || self.combined_pattern.is_none() {
203 return Vec::new();
204 }
205
206 let content_lower = if content.is_ascii() {
208 content.to_ascii_lowercase()
209 } else {
210 content.to_lowercase()
211 };
212 let has_potential_matches = self.name_variants.iter().any(|name| content_lower.contains(name));
213
214 if !has_potential_matches {
215 return Vec::new();
216 }
217
218 let hash = fast_hash(content);
220 {
221 if let Ok(cache) = self.content_cache.lock()
223 && let Some(cached) = cache.get(&hash)
224 {
225 return cached.clone();
226 }
227 }
228
229 let mut violations = Vec::new();
230
231 let combined_regex = match &self.combined_pattern {
233 Some(pattern) => match get_cached_fancy_regex(pattern) {
234 Ok(regex) => regex,
235 Err(_) => return Vec::new(),
236 },
237 None => return Vec::new(),
238 };
239
240 for (line_idx, line_info) in ctx.lines.iter().enumerate() {
242 let line_num = line_idx + 1;
243 let line = line_info.content(ctx.content);
244
245 let trimmed = line.trim_start();
247 if trimmed.starts_with("```") || trimmed.starts_with("~~~") {
248 continue;
249 }
250
251 if !self.config.code_blocks && line_info.in_code_block {
253 continue;
254 }
255
256 if !self.config.html_elements && line_info.in_html_block {
258 continue;
259 }
260
261 let in_html_comment = if !self.config.html_comments {
263 self.is_in_html_comment(content, line_info.byte_offset)
265 } else {
266 false
267 };
268
269 if in_html_comment {
270 continue;
271 }
272
273 if line_info.in_jsx_expression || line_info.in_mdx_comment {
275 continue;
276 }
277
278 if line_info.in_obsidian_comment {
280 continue;
281 }
282
283 let line_lower = line.to_lowercase();
285 let has_line_matches = self.name_variants.iter().any(|name| line_lower.contains(name));
286
287 if !has_line_matches {
288 continue;
289 }
290
291 for cap_result in combined_regex.find_iter(line) {
293 match cap_result {
294 Ok(cap) => {
295 let found_name = &line[cap.start()..cap.end()];
296
297 let start_pos = cap.start();
299 let end_pos = cap.end();
300
301 if !self.is_at_word_boundary(line, start_pos, true)
302 || !self.is_at_word_boundary(line, end_pos, false)
303 {
304 continue; }
306
307 if !self.config.code_blocks {
309 let byte_pos = line_info.byte_offset + cap.start();
310 if ctx.is_in_code_block_or_span(byte_pos) {
311 continue;
312 }
313 }
314
315 let byte_pos = line_info.byte_offset + cap.start();
317 if self.is_in_link(ctx, byte_pos) {
318 continue;
319 }
320
321 if let Some(proper_name) = self.get_proper_name_for(found_name) {
323 if found_name != proper_name {
325 violations.push((line_num, cap.start() + 1, found_name.to_string()));
326 }
327 }
328 }
329 Err(e) => {
330 eprintln!("Regex execution error on line {line_num}: {e}");
331 }
332 }
333 }
334 }
335
336 if let Ok(mut cache) = self.content_cache.lock() {
338 cache.insert(hash, violations.clone());
339 }
340 violations
341 }
342
343 fn is_in_html_comment(&self, content: &str, byte_pos: usize) -> bool {
345 for m in HTML_COMMENT_REGEX.find_iter(content).flatten() {
346 if m.start() <= byte_pos && byte_pos < m.end() {
347 return true;
348 }
349 }
350 false
351 }
352
353 fn is_in_link(&self, ctx: &crate::lint_context::LintContext, byte_pos: usize) -> bool {
360 use pulldown_cmark::LinkType;
361
362 for link in &ctx.links {
364 if link.byte_offset <= byte_pos && byte_pos < link.byte_end {
365 let text_start = if matches!(link.link_type, LinkType::WikiLink { .. }) {
369 link.byte_offset + 2
370 } else {
371 link.byte_offset + 1
372 };
373 let text_end = text_start + link.text.len();
374
375 if byte_pos >= text_start && byte_pos < text_end {
377 return false;
378 }
379
380 return true;
382 }
383 }
384
385 for image in &ctx.images {
387 if image.byte_offset <= byte_pos && byte_pos < image.byte_end {
388 let alt_start = image.byte_offset + 2;
390 let alt_end = alt_start + image.alt_text.len();
391
392 if byte_pos >= alt_start && byte_pos < alt_end {
394 return false;
395 }
396
397 return true;
399 }
400 }
401
402 for m in REF_DEF_REGEX.find_iter(ctx.content) {
405 if m.start() <= byte_pos && byte_pos < m.end() {
406 return true;
407 }
408 }
409
410 false
411 }
412
413 fn is_word_boundary_char(c: char) -> bool {
415 !c.is_alphanumeric()
416 }
417
418 fn is_at_word_boundary(&self, content: &str, pos: usize, is_start: bool) -> bool {
420 let chars: Vec<char> = content.chars().collect();
421 let char_indices: Vec<(usize, char)> = content.char_indices().collect();
422
423 let char_pos = char_indices.iter().position(|(idx, _)| *idx == pos);
425 if char_pos.is_none() {
426 return true; }
428 let char_pos = char_pos.unwrap();
429
430 if is_start {
431 if char_pos == 0 {
433 return true; }
435 Self::is_word_boundary_char(chars[char_pos - 1])
436 } else {
437 if char_pos >= chars.len() {
439 return true; }
441 Self::is_word_boundary_char(chars[char_pos])
442 }
443 }
444
445 fn get_proper_name_for(&self, found_name: &str) -> Option<String> {
447 let found_lower = found_name.to_lowercase();
448
449 for name in &self.config.names {
451 let lower_name = name.to_lowercase();
452 let lower_name_no_dots = lower_name.replace('.', "");
453
454 if found_lower == lower_name || found_lower == lower_name_no_dots {
456 return Some(name.clone());
457 }
458
459 let ascii_normalized = Self::ascii_normalize(&lower_name);
461
462 let ascii_no_dots = ascii_normalized.replace('.', "");
463
464 if found_lower == ascii_normalized || found_lower == ascii_no_dots {
465 return Some(name.clone());
466 }
467 }
468 None
469 }
470}
471
472impl Rule for MD044ProperNames {
473 fn name(&self) -> &'static str {
474 "MD044"
475 }
476
477 fn description(&self) -> &'static str {
478 "Proper names should have the correct capitalization"
479 }
480
481 fn should_skip(&self, ctx: &crate::lint_context::LintContext) -> bool {
482 if self.config.names.is_empty() {
483 return true;
484 }
485 let content_lower = ctx.content.to_lowercase();
487 !self
488 .config
489 .names
490 .iter()
491 .any(|name| content_lower.contains(&name.to_lowercase()))
492 }
493
494 fn check(&self, ctx: &crate::lint_context::LintContext) -> LintResult {
495 let content = ctx.content;
496 if content.is_empty() || self.config.names.is_empty() || self.combined_pattern.is_none() {
497 return Ok(Vec::new());
498 }
499
500 let content_lower = content.to_lowercase();
502 let has_potential_matches = self.config.names.iter().any(|name| {
503 let name_lower = name.to_lowercase();
504 let name_no_dots = name_lower.replace('.', "");
505
506 if content_lower.contains(&name_lower) || content_lower.contains(&name_no_dots) {
508 return true;
509 }
510
511 let ascii_normalized = Self::ascii_normalize(&name_lower);
513
514 if ascii_normalized != name_lower {
515 if content_lower.contains(&ascii_normalized) {
516 return true;
517 }
518 let ascii_no_dots = ascii_normalized.replace('.', "");
519 if ascii_normalized != ascii_no_dots && content_lower.contains(&ascii_no_dots) {
520 return true;
521 }
522 }
523
524 false
525 });
526
527 if !has_potential_matches {
528 return Ok(Vec::new());
529 }
530
531 let line_index = &ctx.line_index;
532 let violations = self.find_name_violations(content, ctx);
533
534 let warnings = violations
535 .into_iter()
536 .filter_map(|(line, column, found_name)| {
537 self.get_proper_name_for(&found_name).map(|proper_name| LintWarning {
538 rule_name: Some(self.name().to_string()),
539 line,
540 column,
541 end_line: line,
542 end_column: column + found_name.len(),
543 message: format!("Proper name '{found_name}' should be '{proper_name}'"),
544 severity: Severity::Warning,
545 fix: Some(Fix {
546 range: line_index.line_col_to_byte_range(line, column),
547 replacement: proper_name,
548 }),
549 })
550 })
551 .collect();
552
553 Ok(warnings)
554 }
555
556 fn fix(&self, ctx: &crate::lint_context::LintContext) -> Result<String, LintError> {
557 let content = ctx.content;
558 if content.is_empty() || self.config.names.is_empty() {
559 return Ok(content.to_string());
560 }
561
562 let violations = self.find_name_violations(content, ctx);
563 if violations.is_empty() {
564 return Ok(content.to_string());
565 }
566
567 let mut fixed_lines = Vec::new();
569
570 let mut violations_by_line: HashMap<usize, Vec<(usize, String)>> = HashMap::new();
572 for (line_num, col_num, found_name) in violations {
573 violations_by_line
574 .entry(line_num)
575 .or_default()
576 .push((col_num, found_name));
577 }
578
579 for violations in violations_by_line.values_mut() {
581 violations.sort_by_key(|b| std::cmp::Reverse(b.0));
582 }
583
584 for (line_idx, line_info) in ctx.lines.iter().enumerate() {
586 let line_num = line_idx + 1;
587
588 if let Some(line_violations) = violations_by_line.get(&line_num) {
589 let mut fixed_line = line_info.content(ctx.content).to_string();
591
592 for (col_num, found_name) in line_violations {
593 if let Some(proper_name) = self.get_proper_name_for(found_name) {
594 let start_col = col_num - 1; let end_col = start_col + found_name.len();
596
597 if end_col <= fixed_line.len()
598 && fixed_line.is_char_boundary(start_col)
599 && fixed_line.is_char_boundary(end_col)
600 {
601 fixed_line.replace_range(start_col..end_col, &proper_name);
602 }
603 }
604 }
605
606 fixed_lines.push(fixed_line);
607 } else {
608 fixed_lines.push(line_info.content(ctx.content).to_string());
610 }
611 }
612
613 let mut result = fixed_lines.join("\n");
615 if content.ends_with('\n') && !result.ends_with('\n') {
616 result.push('\n');
617 }
618 Ok(result)
619 }
620
621 fn as_any(&self) -> &dyn std::any::Any {
622 self
623 }
624
625 fn default_config_section(&self) -> Option<(String, toml::Value)> {
626 let json_value = serde_json::to_value(&self.config).ok()?;
627 Some((
628 self.name().to_string(),
629 crate::rule_config_serde::json_to_toml_value(&json_value)?,
630 ))
631 }
632
633 fn from_config(config: &crate::config::Config) -> Box<dyn Rule>
634 where
635 Self: Sized,
636 {
637 let rule_config = crate::rule_config_serde::load_rule_config::<MD044Config>(config);
638 Box::new(Self::from_config_struct(rule_config))
639 }
640}
641
642#[cfg(test)]
643mod tests {
644 use super::*;
645 use crate::lint_context::LintContext;
646
647 fn create_context(content: &str) -> LintContext<'_> {
648 LintContext::new(content, crate::config::MarkdownFlavor::Standard, None)
649 }
650
651 #[test]
652 fn test_correctly_capitalized_names() {
653 let rule = MD044ProperNames::new(
654 vec![
655 "JavaScript".to_string(),
656 "TypeScript".to_string(),
657 "Node.js".to_string(),
658 ],
659 true,
660 );
661
662 let content = "This document uses JavaScript, TypeScript, and Node.js correctly.";
663 let ctx = create_context(content);
664 let result = rule.check(&ctx).unwrap();
665 assert!(result.is_empty(), "Should not flag correctly capitalized names");
666 }
667
668 #[test]
669 fn test_incorrectly_capitalized_names() {
670 let rule = MD044ProperNames::new(vec!["JavaScript".to_string(), "TypeScript".to_string()], true);
671
672 let content = "This document uses javascript and typescript incorrectly.";
673 let ctx = create_context(content);
674 let result = rule.check(&ctx).unwrap();
675
676 assert_eq!(result.len(), 2, "Should flag two incorrect capitalizations");
677 assert_eq!(result[0].message, "Proper name 'javascript' should be 'JavaScript'");
678 assert_eq!(result[0].line, 1);
679 assert_eq!(result[0].column, 20);
680 assert_eq!(result[1].message, "Proper name 'typescript' should be 'TypeScript'");
681 assert_eq!(result[1].line, 1);
682 assert_eq!(result[1].column, 35);
683 }
684
685 #[test]
686 fn test_names_at_beginning_of_sentences() {
687 let rule = MD044ProperNames::new(vec!["JavaScript".to_string(), "Python".to_string()], true);
688
689 let content = "javascript is a great language. python is also popular.";
690 let ctx = create_context(content);
691 let result = rule.check(&ctx).unwrap();
692
693 assert_eq!(result.len(), 2, "Should flag names at beginning of sentences");
694 assert_eq!(result[0].line, 1);
695 assert_eq!(result[0].column, 1);
696 assert_eq!(result[1].line, 1);
697 assert_eq!(result[1].column, 33);
698 }
699
700 #[test]
701 fn test_names_in_code_blocks_checked_by_default() {
702 let rule = MD044ProperNames::new(vec!["JavaScript".to_string()], true);
703
704 let content = r#"Here is some text with JavaScript.
705
706```javascript
707// This javascript should be checked
708const lang = "javascript";
709```
710
711But this javascript should be flagged."#;
712
713 let ctx = create_context(content);
714 let result = rule.check(&ctx).unwrap();
715
716 assert_eq!(result.len(), 3, "Should flag javascript inside and outside code blocks");
717 assert_eq!(result[0].line, 4);
718 assert_eq!(result[1].line, 5);
719 assert_eq!(result[2].line, 8);
720 }
721
722 #[test]
723 fn test_names_in_code_blocks_ignored_when_disabled() {
724 let rule = MD044ProperNames::new(
725 vec!["JavaScript".to_string()],
726 false, );
728
729 let content = r#"```
730javascript in code block
731```"#;
732
733 let ctx = create_context(content);
734 let result = rule.check(&ctx).unwrap();
735
736 assert_eq!(
737 result.len(),
738 0,
739 "Should not flag javascript in code blocks when code_blocks is false"
740 );
741 }
742
743 #[test]
744 fn test_names_in_inline_code_checked_by_default() {
745 let rule = MD044ProperNames::new(vec!["JavaScript".to_string()], true);
746
747 let content = "This is `javascript` in inline code and javascript outside.";
748 let ctx = create_context(content);
749 let result = rule.check(&ctx).unwrap();
750
751 assert_eq!(result.len(), 2, "Should flag javascript inside and outside inline code");
753 assert_eq!(result[0].column, 10); assert_eq!(result[1].column, 41); }
756
757 #[test]
758 fn test_multiple_names_in_same_line() {
759 let rule = MD044ProperNames::new(
760 vec!["JavaScript".to_string(), "TypeScript".to_string(), "React".to_string()],
761 true,
762 );
763
764 let content = "I use javascript, typescript, and react in my projects.";
765 let ctx = create_context(content);
766 let result = rule.check(&ctx).unwrap();
767
768 assert_eq!(result.len(), 3, "Should flag all three incorrect names");
769 assert_eq!(result[0].message, "Proper name 'javascript' should be 'JavaScript'");
770 assert_eq!(result[1].message, "Proper name 'typescript' should be 'TypeScript'");
771 assert_eq!(result[2].message, "Proper name 'react' should be 'React'");
772 }
773
774 #[test]
775 fn test_case_sensitivity() {
776 let rule = MD044ProperNames::new(vec!["JavaScript".to_string()], true);
777
778 let content = "JAVASCRIPT, Javascript, javascript, and JavaScript variations.";
779 let ctx = create_context(content);
780 let result = rule.check(&ctx).unwrap();
781
782 assert_eq!(result.len(), 3, "Should flag all incorrect case variations");
783 assert!(result.iter().all(|w| w.message.contains("should be 'JavaScript'")));
785 }
786
787 #[test]
788 fn test_configuration_with_custom_name_list() {
789 let config = MD044Config {
790 names: vec!["GitHub".to_string(), "GitLab".to_string(), "DevOps".to_string()],
791 code_blocks: true,
792 html_elements: true,
793 html_comments: true,
794 };
795 let rule = MD044ProperNames::from_config_struct(config);
796
797 let content = "We use github, gitlab, and devops for our workflow.";
798 let ctx = create_context(content);
799 let result = rule.check(&ctx).unwrap();
800
801 assert_eq!(result.len(), 3, "Should flag all custom names");
802 assert_eq!(result[0].message, "Proper name 'github' should be 'GitHub'");
803 assert_eq!(result[1].message, "Proper name 'gitlab' should be 'GitLab'");
804 assert_eq!(result[2].message, "Proper name 'devops' should be 'DevOps'");
805 }
806
807 #[test]
808 fn test_empty_configuration() {
809 let rule = MD044ProperNames::new(vec![], true);
810
811 let content = "This has javascript and typescript but no configured names.";
812 let ctx = create_context(content);
813 let result = rule.check(&ctx).unwrap();
814
815 assert!(result.is_empty(), "Should not flag anything with empty configuration");
816 }
817
818 #[test]
819 fn test_names_with_special_characters() {
820 let rule = MD044ProperNames::new(
821 vec!["Node.js".to_string(), "ASP.NET".to_string(), "C++".to_string()],
822 true,
823 );
824
825 let content = "We use nodejs, asp.net, ASP.NET, and c++ in our stack.";
826 let ctx = create_context(content);
827 let result = rule.check(&ctx).unwrap();
828
829 assert_eq!(result.len(), 3, "Should handle special characters correctly");
834
835 let messages: Vec<&str> = result.iter().map(|w| w.message.as_str()).collect();
836 assert!(messages.contains(&"Proper name 'nodejs' should be 'Node.js'"));
837 assert!(messages.contains(&"Proper name 'asp.net' should be 'ASP.NET'"));
838 assert!(messages.contains(&"Proper name 'c++' should be 'C++'"));
839 }
840
841 #[test]
842 fn test_word_boundaries() {
843 let rule = MD044ProperNames::new(vec!["Java".to_string(), "Script".to_string()], true);
844
845 let content = "JavaScript is not java or script, but Java and Script are separate.";
846 let ctx = create_context(content);
847 let result = rule.check(&ctx).unwrap();
848
849 assert_eq!(result.len(), 2, "Should respect word boundaries");
851 assert!(result.iter().any(|w| w.column == 19)); assert!(result.iter().any(|w| w.column == 27)); }
854
855 #[test]
856 fn test_fix_method() {
857 let rule = MD044ProperNames::new(
858 vec![
859 "JavaScript".to_string(),
860 "TypeScript".to_string(),
861 "Node.js".to_string(),
862 ],
863 true,
864 );
865
866 let content = "I love javascript, typescript, and nodejs!";
867 let ctx = create_context(content);
868 let fixed = rule.fix(&ctx).unwrap();
869
870 assert_eq!(fixed, "I love JavaScript, TypeScript, and Node.js!");
871 }
872
873 #[test]
874 fn test_fix_multiple_occurrences() {
875 let rule = MD044ProperNames::new(vec!["Python".to_string()], true);
876
877 let content = "python is great. I use python daily. PYTHON is powerful.";
878 let ctx = create_context(content);
879 let fixed = rule.fix(&ctx).unwrap();
880
881 assert_eq!(fixed, "Python is great. I use Python daily. Python is powerful.");
882 }
883
884 #[test]
885 fn test_fix_checks_code_blocks_by_default() {
886 let rule = MD044ProperNames::new(vec!["JavaScript".to_string()], true);
887
888 let content = r#"I love javascript.
889
890```
891const lang = "javascript";
892```
893
894More javascript here."#;
895
896 let ctx = create_context(content);
897 let fixed = rule.fix(&ctx).unwrap();
898
899 let expected = r#"I love JavaScript.
900
901```
902const lang = "JavaScript";
903```
904
905More JavaScript here."#;
906
907 assert_eq!(fixed, expected);
908 }
909
910 #[test]
911 fn test_multiline_content() {
912 let rule = MD044ProperNames::new(vec!["Rust".to_string(), "Python".to_string()], true);
913
914 let content = r#"First line with rust.
915Second line with python.
916Third line with RUST and PYTHON."#;
917
918 let ctx = create_context(content);
919 let result = rule.check(&ctx).unwrap();
920
921 assert_eq!(result.len(), 4, "Should flag all incorrect occurrences");
922 assert_eq!(result[0].line, 1);
923 assert_eq!(result[1].line, 2);
924 assert_eq!(result[2].line, 3);
925 assert_eq!(result[3].line, 3);
926 }
927
928 #[test]
929 fn test_default_config() {
930 let config = MD044Config::default();
931 assert!(config.names.is_empty());
932 assert!(!config.code_blocks); }
934
935 #[test]
936 fn test_performance_with_many_names() {
937 let mut names = vec![];
938 for i in 0..50 {
939 names.push(format!("ProperName{i}"));
940 }
941
942 let rule = MD044ProperNames::new(names, true);
943
944 let content = "This has propername0, propername25, and propername49 incorrectly.";
945 let ctx = create_context(content);
946 let result = rule.check(&ctx).unwrap();
947
948 assert_eq!(result.len(), 3, "Should handle many configured names efficiently");
949 }
950
951 #[test]
952 fn test_large_name_count_performance() {
953 let names = (0..1000).map(|i| format!("ProperName{i}")).collect::<Vec<_>>();
956
957 let rule = MD044ProperNames::new(names, true);
958
959 assert!(rule.combined_pattern.is_some());
961
962 let content = "This has propername0 and propername999 in it.";
964 let ctx = create_context(content);
965 let result = rule.check(&ctx).unwrap();
966
967 assert_eq!(result.len(), 2, "Should handle 1000 names without issues");
969 }
970
971 #[test]
972 fn test_cache_behavior() {
973 let rule = MD044ProperNames::new(vec!["JavaScript".to_string()], true);
974
975 let content = "Using javascript here.";
976 let ctx = create_context(content);
977
978 let result1 = rule.check(&ctx).unwrap();
980 assert_eq!(result1.len(), 1);
981
982 let result2 = rule.check(&ctx).unwrap();
984 assert_eq!(result2.len(), 1);
985
986 assert_eq!(result1[0].line, result2[0].line);
988 assert_eq!(result1[0].column, result2[0].column);
989 }
990
991 #[test]
992 fn test_html_comments_not_checked_when_disabled() {
993 let config = MD044Config {
994 names: vec!["JavaScript".to_string()],
995 code_blocks: true, html_elements: true, html_comments: false, };
999 let rule = MD044ProperNames::from_config_struct(config);
1000
1001 let content = r#"Regular javascript here.
1002<!-- This javascript in HTML comment should be ignored -->
1003More javascript outside."#;
1004
1005 let ctx = create_context(content);
1006 let result = rule.check(&ctx).unwrap();
1007
1008 assert_eq!(result.len(), 2, "Should only flag javascript outside HTML comments");
1009 assert_eq!(result[0].line, 1);
1010 assert_eq!(result[1].line, 3);
1011 }
1012
1013 #[test]
1014 fn test_html_comments_checked_when_enabled() {
1015 let config = MD044Config {
1016 names: vec!["JavaScript".to_string()],
1017 code_blocks: true, html_elements: true, html_comments: true, };
1021 let rule = MD044ProperNames::from_config_struct(config);
1022
1023 let content = r#"Regular javascript here.
1024<!-- This javascript in HTML comment should be checked -->
1025More javascript outside."#;
1026
1027 let ctx = create_context(content);
1028 let result = rule.check(&ctx).unwrap();
1029
1030 assert_eq!(
1031 result.len(),
1032 3,
1033 "Should flag all javascript occurrences including in HTML comments"
1034 );
1035 }
1036
1037 #[test]
1038 fn test_multiline_html_comments() {
1039 let config = MD044Config {
1040 names: vec!["Python".to_string(), "JavaScript".to_string()],
1041 code_blocks: true, html_elements: true, html_comments: false, };
1045 let rule = MD044ProperNames::from_config_struct(config);
1046
1047 let content = r#"Regular python here.
1048<!--
1049This is a multiline comment
1050with javascript and python
1051that should be ignored
1052-->
1053More javascript outside."#;
1054
1055 let ctx = create_context(content);
1056 let result = rule.check(&ctx).unwrap();
1057
1058 assert_eq!(result.len(), 2, "Should only flag names outside HTML comments");
1059 assert_eq!(result[0].line, 1); assert_eq!(result[1].line, 7); }
1062
1063 #[test]
1064 fn test_fix_preserves_html_comments_when_disabled() {
1065 let config = MD044Config {
1066 names: vec!["JavaScript".to_string()],
1067 code_blocks: true, html_elements: true, html_comments: false, };
1071 let rule = MD044ProperNames::from_config_struct(config);
1072
1073 let content = r#"javascript here.
1074<!-- javascript in comment -->
1075More javascript."#;
1076
1077 let ctx = create_context(content);
1078 let fixed = rule.fix(&ctx).unwrap();
1079
1080 let expected = r#"JavaScript here.
1081<!-- javascript in comment -->
1082More JavaScript."#;
1083
1084 assert_eq!(
1085 fixed, expected,
1086 "Should not fix names inside HTML comments when disabled"
1087 );
1088 }
1089
1090 #[test]
1091 fn test_proper_names_in_link_text_are_flagged() {
1092 let rule = MD044ProperNames::new(
1093 vec!["JavaScript".to_string(), "Node.js".to_string(), "Python".to_string()],
1094 true,
1095 );
1096
1097 let content = r#"Check this [javascript documentation](https://javascript.info) for info.
1098
1099Visit [node.js homepage](https://nodejs.org) and [python tutorial](https://python.org).
1100
1101Real javascript should be flagged.
1102
1103Also see the [typescript guide][ts-ref] for more.
1104
1105Real python should be flagged too.
1106
1107[ts-ref]: https://typescript.org/handbook"#;
1108
1109 let ctx = create_context(content);
1110 let result = rule.check(&ctx).unwrap();
1111
1112 assert_eq!(result.len(), 5, "Expected 5 warnings: 3 in link text + 2 standalone");
1119
1120 let line_1_warnings: Vec<_> = result.iter().filter(|w| w.line == 1).collect();
1122 assert_eq!(line_1_warnings.len(), 1);
1123 assert!(
1124 line_1_warnings[0]
1125 .message
1126 .contains("'javascript' should be 'JavaScript'")
1127 );
1128
1129 let line_3_warnings: Vec<_> = result.iter().filter(|w| w.line == 3).collect();
1130 assert_eq!(line_3_warnings.len(), 2); assert!(result.iter().any(|w| w.line == 5 && w.message.contains("'javascript'")));
1134 assert!(result.iter().any(|w| w.line == 9 && w.message.contains("'python'")));
1135 }
1136
1137 #[test]
1138 fn test_link_urls_not_flagged() {
1139 let rule = MD044ProperNames::new(vec!["JavaScript".to_string()], true);
1140
1141 let content = r#"[Link Text](https://javascript.info/guide)"#;
1143
1144 let ctx = create_context(content);
1145 let result = rule.check(&ctx).unwrap();
1146
1147 assert!(result.is_empty(), "URLs should not be checked for proper names");
1149 }
1150
1151 #[test]
1152 fn test_proper_names_in_image_alt_text_are_flagged() {
1153 let rule = MD044ProperNames::new(vec!["JavaScript".to_string()], true);
1154
1155 let content = r#"Here is a  image.
1156
1157Real javascript should be flagged."#;
1158
1159 let ctx = create_context(content);
1160 let result = rule.check(&ctx).unwrap();
1161
1162 assert_eq!(result.len(), 2, "Expected 2 warnings: 1 in alt text + 1 standalone");
1166 assert!(result[0].message.contains("'javascript' should be 'JavaScript'"));
1167 assert!(result[0].line == 1); assert!(result[1].message.contains("'javascript' should be 'JavaScript'"));
1169 assert!(result[1].line == 3); }
1171
1172 #[test]
1173 fn test_image_urls_not_flagged() {
1174 let rule = MD044ProperNames::new(vec!["JavaScript".to_string()], true);
1175
1176 let content = r#""#;
1178
1179 let ctx = create_context(content);
1180 let result = rule.check(&ctx).unwrap();
1181
1182 assert!(result.is_empty(), "Image URLs should not be checked for proper names");
1184 }
1185
1186 #[test]
1187 fn test_reference_link_text_flagged_but_definition_not() {
1188 let rule = MD044ProperNames::new(vec!["JavaScript".to_string(), "TypeScript".to_string()], true);
1189
1190 let content = r#"Check the [javascript guide][js-ref] for details.
1191
1192Real javascript should be flagged.
1193
1194[js-ref]: https://javascript.info/typescript/guide"#;
1195
1196 let ctx = create_context(content);
1197 let result = rule.check(&ctx).unwrap();
1198
1199 assert_eq!(result.len(), 2, "Expected 2 warnings: 1 in link text + 1 standalone");
1204 assert!(result.iter().any(|w| w.line == 1 && w.message.contains("'javascript'")));
1205 assert!(result.iter().any(|w| w.line == 3 && w.message.contains("'javascript'")));
1206 }
1207
1208 #[test]
1209 fn test_reference_definitions_not_flagged() {
1210 let rule = MD044ProperNames::new(vec!["JavaScript".to_string()], true);
1211
1212 let content = r#"[js-ref]: https://javascript.info/guide"#;
1214
1215 let ctx = create_context(content);
1216 let result = rule.check(&ctx).unwrap();
1217
1218 assert!(result.is_empty(), "Reference definitions should not be checked");
1220 }
1221
1222 #[test]
1223 fn test_wikilinks_text_is_flagged() {
1224 let rule = MD044ProperNames::new(vec!["JavaScript".to_string()], true);
1225
1226 let content = r#"[[javascript]]
1228
1229Regular javascript here.
1230
1231[[JavaScript|display text]]"#;
1232
1233 let ctx = create_context(content);
1234 let result = rule.check(&ctx).unwrap();
1235
1236 assert_eq!(result.len(), 2, "Expected 2 warnings: 1 in WikiLink + 1 standalone");
1240 assert!(
1241 result
1242 .iter()
1243 .any(|w| w.line == 1 && w.column == 3 && w.message.contains("'javascript'"))
1244 );
1245 assert!(result.iter().any(|w| w.line == 3 && w.message.contains("'javascript'")));
1246 }
1247}