1use crate::utils::fast_hash;
2use crate::utils::regex_cache::{escape_regex, get_cached_fancy_regex};
3
4use crate::rule::{Fix, LintError, LintResult, LintWarning, Rule, Severity};
5use std::collections::{HashMap, HashSet};
6use std::sync::{Arc, Mutex};
7
8mod md044_config;
9use md044_config::MD044Config;
10
11type WarningPosition = (usize, usize, String); #[derive(Clone)]
68pub struct MD044ProperNames {
69 config: MD044Config,
70 combined_pattern: Option<String>,
72 name_variants: Vec<String>,
74 content_cache: Arc<Mutex<HashMap<u64, Vec<WarningPosition>>>>,
76}
77
78impl MD044ProperNames {
79 pub fn new(names: Vec<String>, code_blocks: bool) -> Self {
80 let config = MD044Config {
81 names,
82 code_blocks,
83 html_elements: true, html_comments: true, };
86 let combined_pattern = Self::create_combined_pattern(&config);
87 let name_variants = Self::build_name_variants(&config);
88 Self {
89 config,
90 combined_pattern,
91 name_variants,
92 content_cache: Arc::new(Mutex::new(HashMap::new())),
93 }
94 }
95
96 fn ascii_normalize(s: &str) -> String {
98 s.replace(['é', 'è', 'ê', 'ë'], "e")
99 .replace(['à', 'á', 'â', 'ä', 'ã', 'å'], "a")
100 .replace(['ï', 'î', 'í', 'ì'], "i")
101 .replace(['ü', 'ú', 'ù', 'û'], "u")
102 .replace(['ö', 'ó', 'ò', 'ô', 'õ'], "o")
103 .replace('ñ', "n")
104 .replace('ç', "c")
105 }
106
107 pub fn from_config_struct(config: MD044Config) -> Self {
108 let combined_pattern = Self::create_combined_pattern(&config);
109 let name_variants = Self::build_name_variants(&config);
110 Self {
111 config,
112 combined_pattern,
113 name_variants,
114 content_cache: Arc::new(Mutex::new(HashMap::new())),
115 }
116 }
117
118 fn create_combined_pattern(config: &MD044Config) -> Option<String> {
120 if config.names.is_empty() {
121 return None;
122 }
123
124 let mut patterns: Vec<String> = config
126 .names
127 .iter()
128 .flat_map(|name| {
129 let mut variations = vec![];
130 let lower_name = name.to_lowercase();
131
132 variations.push(escape_regex(&lower_name));
134
135 let lower_name_no_dots = lower_name.replace('.', "");
137 if lower_name != lower_name_no_dots {
138 variations.push(escape_regex(&lower_name_no_dots));
139 }
140
141 let ascii_normalized = Self::ascii_normalize(&lower_name);
143
144 if ascii_normalized != lower_name {
145 variations.push(escape_regex(&ascii_normalized));
146
147 let ascii_no_dots = ascii_normalized.replace('.', "");
149 if ascii_normalized != ascii_no_dots {
150 variations.push(escape_regex(&ascii_no_dots));
151 }
152 }
153
154 variations
155 })
156 .collect();
157
158 patterns.sort_by_key(|b| std::cmp::Reverse(b.len()));
160
161 Some(format!(r"(?i)({})", patterns.join("|")))
164 }
165
166 fn build_name_variants(config: &MD044Config) -> Vec<String> {
167 let mut variants = HashSet::new();
168 for name in &config.names {
169 let lower_name = name.to_lowercase();
170 variants.insert(lower_name.clone());
171
172 let lower_no_dots = lower_name.replace('.', "");
173 if lower_name != lower_no_dots {
174 variants.insert(lower_no_dots);
175 }
176
177 let ascii_normalized = Self::ascii_normalize(&lower_name);
178 if ascii_normalized != lower_name {
179 variants.insert(ascii_normalized.clone());
180
181 let ascii_no_dots = ascii_normalized.replace('.', "");
182 if ascii_normalized != ascii_no_dots {
183 variants.insert(ascii_no_dots);
184 }
185 }
186 }
187
188 variants.into_iter().collect()
189 }
190
191 fn find_name_violations(
194 &self,
195 content: &str,
196 ctx: &crate::lint_context::LintContext,
197 content_lower: &str,
198 ) -> Vec<WarningPosition> {
199 if self.config.names.is_empty() || content.is_empty() || self.combined_pattern.is_none() {
201 return Vec::new();
202 }
203
204 let has_potential_matches = self.name_variants.iter().any(|name| content_lower.contains(name));
206
207 if !has_potential_matches {
208 return Vec::new();
209 }
210
211 let hash = fast_hash(content);
213 {
214 if let Ok(cache) = self.content_cache.lock()
216 && let Some(cached) = cache.get(&hash)
217 {
218 return cached.clone();
219 }
220 }
221
222 let mut violations = Vec::new();
223
224 let combined_regex = match &self.combined_pattern {
226 Some(pattern) => match get_cached_fancy_regex(pattern) {
227 Ok(regex) => regex,
228 Err(_) => return Vec::new(),
229 },
230 None => return Vec::new(),
231 };
232
233 for (line_idx, line_info) in ctx.lines.iter().enumerate() {
235 let line_num = line_idx + 1;
236 let line = line_info.content(ctx.content);
237
238 let trimmed = line.trim_start();
240 if trimmed.starts_with("```") || trimmed.starts_with("~~~") {
241 continue;
242 }
243
244 if !self.config.code_blocks && line_info.in_code_block {
246 continue;
247 }
248
249 if !self.config.html_elements && line_info.in_html_block {
251 continue;
252 }
253
254 if !self.config.html_comments && line_info.in_html_comment {
256 continue;
257 }
258
259 if line_info.in_jsx_expression || line_info.in_mdx_comment {
261 continue;
262 }
263
264 if line_info.in_obsidian_comment {
266 continue;
267 }
268
269 let line_lower = line.to_lowercase();
271 let has_line_matches = self.name_variants.iter().any(|name| line_lower.contains(name));
272
273 if !has_line_matches {
274 continue;
275 }
276
277 for cap_result in combined_regex.find_iter(line) {
279 match cap_result {
280 Ok(cap) => {
281 let found_name = &line[cap.start()..cap.end()];
282
283 let start_pos = cap.start();
285 let end_pos = cap.end();
286
287 if !Self::is_at_word_boundary(line, start_pos, true)
288 || !Self::is_at_word_boundary(line, end_pos, false)
289 {
290 continue; }
292
293 if !self.config.code_blocks {
295 let byte_pos = line_info.byte_offset + cap.start();
296 if ctx.is_in_code_block_or_span(byte_pos) {
297 continue;
298 }
299 }
300
301 let byte_pos = line_info.byte_offset + cap.start();
303 if Self::is_in_link(ctx, byte_pos) {
304 continue;
305 }
306
307 if let Some(proper_name) = self.get_proper_name_for(found_name) {
309 if found_name != proper_name {
311 violations.push((line_num, cap.start() + 1, found_name.to_string()));
312 }
313 }
314 }
315 Err(e) => {
316 eprintln!("Regex execution error on line {line_num}: {e}");
317 }
318 }
319 }
320 }
321
322 if let Ok(mut cache) = self.content_cache.lock() {
324 cache.insert(hash, violations.clone());
325 }
326 violations
327 }
328
329 fn is_in_link(ctx: &crate::lint_context::LintContext, byte_pos: usize) -> bool {
336 use pulldown_cmark::LinkType;
337
338 let link_idx = ctx.links.partition_point(|link| link.byte_offset <= byte_pos);
340 if link_idx > 0 {
341 let link = &ctx.links[link_idx - 1];
342 if byte_pos < link.byte_end {
343 let text_start = if matches!(link.link_type, LinkType::WikiLink { .. }) {
345 link.byte_offset + 2
346 } else {
347 link.byte_offset + 1
348 };
349 let text_end = text_start + link.text.len();
350
351 if byte_pos >= text_start && byte_pos < text_end {
353 return Self::link_text_is_url(&link.text);
354 }
355 return true;
357 }
358 }
359
360 let image_idx = ctx.images.partition_point(|img| img.byte_offset <= byte_pos);
362 if image_idx > 0 {
363 let image = &ctx.images[image_idx - 1];
364 if byte_pos < image.byte_end {
365 let alt_start = image.byte_offset + 2;
367 let alt_end = alt_start + image.alt_text.len();
368
369 if byte_pos >= alt_start && byte_pos < alt_end {
371 return false;
372 }
373 return true;
375 }
376 }
377
378 ctx.is_in_reference_def(byte_pos)
380 }
381
382 fn link_text_is_url(text: &str) -> bool {
385 let lower = text.trim().to_ascii_lowercase();
386 lower.starts_with("http://") || lower.starts_with("https://") || lower.starts_with("www.")
387 }
388
389 fn is_word_boundary_char(c: char) -> bool {
391 !c.is_alphanumeric()
392 }
393
394 fn is_at_word_boundary(content: &str, pos: usize, is_start: bool) -> bool {
396 if is_start {
397 if pos == 0 {
398 return true;
399 }
400 match content[..pos].chars().next_back() {
402 None => true,
403 Some(c) => Self::is_word_boundary_char(c),
404 }
405 } else {
406 if pos >= content.len() {
407 return true;
408 }
409 match content[pos..].chars().next() {
411 None => true,
412 Some(c) => Self::is_word_boundary_char(c),
413 }
414 }
415 }
416
417 fn get_proper_name_for(&self, found_name: &str) -> Option<String> {
419 let found_lower = found_name.to_lowercase();
420
421 for name in &self.config.names {
423 let lower_name = name.to_lowercase();
424 let lower_name_no_dots = lower_name.replace('.', "");
425
426 if found_lower == lower_name || found_lower == lower_name_no_dots {
428 return Some(name.clone());
429 }
430
431 let ascii_normalized = Self::ascii_normalize(&lower_name);
433
434 let ascii_no_dots = ascii_normalized.replace('.', "");
435
436 if found_lower == ascii_normalized || found_lower == ascii_no_dots {
437 return Some(name.clone());
438 }
439 }
440 None
441 }
442}
443
444impl Rule for MD044ProperNames {
445 fn name(&self) -> &'static str {
446 "MD044"
447 }
448
449 fn description(&self) -> &'static str {
450 "Proper names should have the correct capitalization"
451 }
452
453 fn should_skip(&self, ctx: &crate::lint_context::LintContext) -> bool {
454 if self.config.names.is_empty() {
455 return true;
456 }
457 let content_lower = if ctx.content.is_ascii() {
459 ctx.content.to_ascii_lowercase()
460 } else {
461 ctx.content.to_lowercase()
462 };
463 !self.name_variants.iter().any(|name| content_lower.contains(name))
464 }
465
466 fn check(&self, ctx: &crate::lint_context::LintContext) -> LintResult {
467 let content = ctx.content;
468 if content.is_empty() || self.config.names.is_empty() || self.combined_pattern.is_none() {
469 return Ok(Vec::new());
470 }
471
472 let content_lower = if content.is_ascii() {
474 content.to_ascii_lowercase()
475 } else {
476 content.to_lowercase()
477 };
478
479 let has_potential_matches = self.name_variants.iter().any(|name| content_lower.contains(name));
481
482 if !has_potential_matches {
483 return Ok(Vec::new());
484 }
485
486 let line_index = &ctx.line_index;
487 let violations = self.find_name_violations(content, ctx, &content_lower);
488
489 let warnings = violations
490 .into_iter()
491 .filter_map(|(line, column, found_name)| {
492 self.get_proper_name_for(&found_name).map(|proper_name| LintWarning {
493 rule_name: Some(self.name().to_string()),
494 line,
495 column,
496 end_line: line,
497 end_column: column + found_name.len(),
498 message: format!("Proper name '{found_name}' should be '{proper_name}'"),
499 severity: Severity::Warning,
500 fix: Some(Fix {
501 range: line_index.line_col_to_byte_range(line, column),
502 replacement: proper_name,
503 }),
504 })
505 })
506 .collect();
507
508 Ok(warnings)
509 }
510
511 fn fix(&self, ctx: &crate::lint_context::LintContext) -> Result<String, LintError> {
512 let content = ctx.content;
513 if content.is_empty() || self.config.names.is_empty() {
514 return Ok(content.to_string());
515 }
516
517 let content_lower = if content.is_ascii() {
518 content.to_ascii_lowercase()
519 } else {
520 content.to_lowercase()
521 };
522 let violations = self.find_name_violations(content, ctx, &content_lower);
523 if violations.is_empty() {
524 return Ok(content.to_string());
525 }
526
527 let mut fixed_lines = Vec::new();
529
530 let mut violations_by_line: HashMap<usize, Vec<(usize, String)>> = HashMap::new();
532 for (line_num, col_num, found_name) in violations {
533 violations_by_line
534 .entry(line_num)
535 .or_default()
536 .push((col_num, found_name));
537 }
538
539 for violations in violations_by_line.values_mut() {
541 violations.sort_by_key(|b| std::cmp::Reverse(b.0));
542 }
543
544 for (line_idx, line_info) in ctx.lines.iter().enumerate() {
546 let line_num = line_idx + 1;
547
548 if let Some(line_violations) = violations_by_line.get(&line_num) {
549 let mut fixed_line = line_info.content(ctx.content).to_string();
551
552 for (col_num, found_name) in line_violations {
553 if let Some(proper_name) = self.get_proper_name_for(found_name) {
554 let start_col = col_num - 1; let end_col = start_col + found_name.len();
556
557 if end_col <= fixed_line.len()
558 && fixed_line.is_char_boundary(start_col)
559 && fixed_line.is_char_boundary(end_col)
560 {
561 fixed_line.replace_range(start_col..end_col, &proper_name);
562 }
563 }
564 }
565
566 fixed_lines.push(fixed_line);
567 } else {
568 fixed_lines.push(line_info.content(ctx.content).to_string());
570 }
571 }
572
573 let mut result = fixed_lines.join("\n");
575 if content.ends_with('\n') && !result.ends_with('\n') {
576 result.push('\n');
577 }
578 Ok(result)
579 }
580
581 fn as_any(&self) -> &dyn std::any::Any {
582 self
583 }
584
585 fn default_config_section(&self) -> Option<(String, toml::Value)> {
586 let json_value = serde_json::to_value(&self.config).ok()?;
587 Some((
588 self.name().to_string(),
589 crate::rule_config_serde::json_to_toml_value(&json_value)?,
590 ))
591 }
592
593 fn from_config(config: &crate::config::Config) -> Box<dyn Rule>
594 where
595 Self: Sized,
596 {
597 let rule_config = crate::rule_config_serde::load_rule_config::<MD044Config>(config);
598 Box::new(Self::from_config_struct(rule_config))
599 }
600}
601
602#[cfg(test)]
603mod tests {
604 use super::*;
605 use crate::lint_context::LintContext;
606
607 fn create_context(content: &str) -> LintContext<'_> {
608 LintContext::new(content, crate::config::MarkdownFlavor::Standard, None)
609 }
610
611 #[test]
612 fn test_correctly_capitalized_names() {
613 let rule = MD044ProperNames::new(
614 vec![
615 "JavaScript".to_string(),
616 "TypeScript".to_string(),
617 "Node.js".to_string(),
618 ],
619 true,
620 );
621
622 let content = "This document uses JavaScript, TypeScript, and Node.js correctly.";
623 let ctx = create_context(content);
624 let result = rule.check(&ctx).unwrap();
625 assert!(result.is_empty(), "Should not flag correctly capitalized names");
626 }
627
628 #[test]
629 fn test_incorrectly_capitalized_names() {
630 let rule = MD044ProperNames::new(vec!["JavaScript".to_string(), "TypeScript".to_string()], true);
631
632 let content = "This document uses javascript and typescript incorrectly.";
633 let ctx = create_context(content);
634 let result = rule.check(&ctx).unwrap();
635
636 assert_eq!(result.len(), 2, "Should flag two incorrect capitalizations");
637 assert_eq!(result[0].message, "Proper name 'javascript' should be 'JavaScript'");
638 assert_eq!(result[0].line, 1);
639 assert_eq!(result[0].column, 20);
640 assert_eq!(result[1].message, "Proper name 'typescript' should be 'TypeScript'");
641 assert_eq!(result[1].line, 1);
642 assert_eq!(result[1].column, 35);
643 }
644
645 #[test]
646 fn test_names_at_beginning_of_sentences() {
647 let rule = MD044ProperNames::new(vec!["JavaScript".to_string(), "Python".to_string()], true);
648
649 let content = "javascript is a great language. python is also popular.";
650 let ctx = create_context(content);
651 let result = rule.check(&ctx).unwrap();
652
653 assert_eq!(result.len(), 2, "Should flag names at beginning of sentences");
654 assert_eq!(result[0].line, 1);
655 assert_eq!(result[0].column, 1);
656 assert_eq!(result[1].line, 1);
657 assert_eq!(result[1].column, 33);
658 }
659
660 #[test]
661 fn test_names_in_code_blocks_checked_by_default() {
662 let rule = MD044ProperNames::new(vec!["JavaScript".to_string()], true);
663
664 let content = r#"Here is some text with JavaScript.
665
666```javascript
667// This javascript should be checked
668const lang = "javascript";
669```
670
671But this javascript should be flagged."#;
672
673 let ctx = create_context(content);
674 let result = rule.check(&ctx).unwrap();
675
676 assert_eq!(result.len(), 3, "Should flag javascript inside and outside code blocks");
677 assert_eq!(result[0].line, 4);
678 assert_eq!(result[1].line, 5);
679 assert_eq!(result[2].line, 8);
680 }
681
682 #[test]
683 fn test_names_in_code_blocks_ignored_when_disabled() {
684 let rule = MD044ProperNames::new(
685 vec!["JavaScript".to_string()],
686 false, );
688
689 let content = r#"```
690javascript in code block
691```"#;
692
693 let ctx = create_context(content);
694 let result = rule.check(&ctx).unwrap();
695
696 assert_eq!(
697 result.len(),
698 0,
699 "Should not flag javascript in code blocks when code_blocks is false"
700 );
701 }
702
703 #[test]
704 fn test_names_in_inline_code_checked_by_default() {
705 let rule = MD044ProperNames::new(vec!["JavaScript".to_string()], true);
706
707 let content = "This is `javascript` in inline code and javascript outside.";
708 let ctx = create_context(content);
709 let result = rule.check(&ctx).unwrap();
710
711 assert_eq!(result.len(), 2, "Should flag javascript inside and outside inline code");
713 assert_eq!(result[0].column, 10); assert_eq!(result[1].column, 41); }
716
717 #[test]
718 fn test_multiple_names_in_same_line() {
719 let rule = MD044ProperNames::new(
720 vec!["JavaScript".to_string(), "TypeScript".to_string(), "React".to_string()],
721 true,
722 );
723
724 let content = "I use javascript, typescript, and react in my projects.";
725 let ctx = create_context(content);
726 let result = rule.check(&ctx).unwrap();
727
728 assert_eq!(result.len(), 3, "Should flag all three incorrect names");
729 assert_eq!(result[0].message, "Proper name 'javascript' should be 'JavaScript'");
730 assert_eq!(result[1].message, "Proper name 'typescript' should be 'TypeScript'");
731 assert_eq!(result[2].message, "Proper name 'react' should be 'React'");
732 }
733
734 #[test]
735 fn test_case_sensitivity() {
736 let rule = MD044ProperNames::new(vec!["JavaScript".to_string()], true);
737
738 let content = "JAVASCRIPT, Javascript, javascript, and JavaScript variations.";
739 let ctx = create_context(content);
740 let result = rule.check(&ctx).unwrap();
741
742 assert_eq!(result.len(), 3, "Should flag all incorrect case variations");
743 assert!(result.iter().all(|w| w.message.contains("should be 'JavaScript'")));
745 }
746
747 #[test]
748 fn test_configuration_with_custom_name_list() {
749 let config = MD044Config {
750 names: vec!["GitHub".to_string(), "GitLab".to_string(), "DevOps".to_string()],
751 code_blocks: true,
752 html_elements: true,
753 html_comments: true,
754 };
755 let rule = MD044ProperNames::from_config_struct(config);
756
757 let content = "We use github, gitlab, and devops for our workflow.";
758 let ctx = create_context(content);
759 let result = rule.check(&ctx).unwrap();
760
761 assert_eq!(result.len(), 3, "Should flag all custom names");
762 assert_eq!(result[0].message, "Proper name 'github' should be 'GitHub'");
763 assert_eq!(result[1].message, "Proper name 'gitlab' should be 'GitLab'");
764 assert_eq!(result[2].message, "Proper name 'devops' should be 'DevOps'");
765 }
766
767 #[test]
768 fn test_empty_configuration() {
769 let rule = MD044ProperNames::new(vec![], true);
770
771 let content = "This has javascript and typescript but no configured names.";
772 let ctx = create_context(content);
773 let result = rule.check(&ctx).unwrap();
774
775 assert!(result.is_empty(), "Should not flag anything with empty configuration");
776 }
777
778 #[test]
779 fn test_names_with_special_characters() {
780 let rule = MD044ProperNames::new(
781 vec!["Node.js".to_string(), "ASP.NET".to_string(), "C++".to_string()],
782 true,
783 );
784
785 let content = "We use nodejs, asp.net, ASP.NET, and c++ in our stack.";
786 let ctx = create_context(content);
787 let result = rule.check(&ctx).unwrap();
788
789 assert_eq!(result.len(), 3, "Should handle special characters correctly");
794
795 let messages: Vec<&str> = result.iter().map(|w| w.message.as_str()).collect();
796 assert!(messages.contains(&"Proper name 'nodejs' should be 'Node.js'"));
797 assert!(messages.contains(&"Proper name 'asp.net' should be 'ASP.NET'"));
798 assert!(messages.contains(&"Proper name 'c++' should be 'C++'"));
799 }
800
801 #[test]
802 fn test_word_boundaries() {
803 let rule = MD044ProperNames::new(vec!["Java".to_string(), "Script".to_string()], true);
804
805 let content = "JavaScript is not java or script, but Java and Script are separate.";
806 let ctx = create_context(content);
807 let result = rule.check(&ctx).unwrap();
808
809 assert_eq!(result.len(), 2, "Should respect word boundaries");
811 assert!(result.iter().any(|w| w.column == 19)); assert!(result.iter().any(|w| w.column == 27)); }
814
815 #[test]
816 fn test_fix_method() {
817 let rule = MD044ProperNames::new(
818 vec![
819 "JavaScript".to_string(),
820 "TypeScript".to_string(),
821 "Node.js".to_string(),
822 ],
823 true,
824 );
825
826 let content = "I love javascript, typescript, and nodejs!";
827 let ctx = create_context(content);
828 let fixed = rule.fix(&ctx).unwrap();
829
830 assert_eq!(fixed, "I love JavaScript, TypeScript, and Node.js!");
831 }
832
833 #[test]
834 fn test_fix_multiple_occurrences() {
835 let rule = MD044ProperNames::new(vec!["Python".to_string()], true);
836
837 let content = "python is great. I use python daily. PYTHON is powerful.";
838 let ctx = create_context(content);
839 let fixed = rule.fix(&ctx).unwrap();
840
841 assert_eq!(fixed, "Python is great. I use Python daily. Python is powerful.");
842 }
843
844 #[test]
845 fn test_fix_checks_code_blocks_by_default() {
846 let rule = MD044ProperNames::new(vec!["JavaScript".to_string()], true);
847
848 let content = r#"I love javascript.
849
850```
851const lang = "javascript";
852```
853
854More javascript here."#;
855
856 let ctx = create_context(content);
857 let fixed = rule.fix(&ctx).unwrap();
858
859 let expected = r#"I love JavaScript.
860
861```
862const lang = "JavaScript";
863```
864
865More JavaScript here."#;
866
867 assert_eq!(fixed, expected);
868 }
869
870 #[test]
871 fn test_multiline_content() {
872 let rule = MD044ProperNames::new(vec!["Rust".to_string(), "Python".to_string()], true);
873
874 let content = r#"First line with rust.
875Second line with python.
876Third line with RUST and PYTHON."#;
877
878 let ctx = create_context(content);
879 let result = rule.check(&ctx).unwrap();
880
881 assert_eq!(result.len(), 4, "Should flag all incorrect occurrences");
882 assert_eq!(result[0].line, 1);
883 assert_eq!(result[1].line, 2);
884 assert_eq!(result[2].line, 3);
885 assert_eq!(result[3].line, 3);
886 }
887
888 #[test]
889 fn test_default_config() {
890 let config = MD044Config::default();
891 assert!(config.names.is_empty());
892 assert!(!config.code_blocks); }
894
895 #[test]
896 fn test_performance_with_many_names() {
897 let mut names = vec![];
898 for i in 0..50 {
899 names.push(format!("ProperName{i}"));
900 }
901
902 let rule = MD044ProperNames::new(names, true);
903
904 let content = "This has propername0, propername25, and propername49 incorrectly.";
905 let ctx = create_context(content);
906 let result = rule.check(&ctx).unwrap();
907
908 assert_eq!(result.len(), 3, "Should handle many configured names efficiently");
909 }
910
911 #[test]
912 fn test_large_name_count_performance() {
913 let names = (0..1000).map(|i| format!("ProperName{i}")).collect::<Vec<_>>();
916
917 let rule = MD044ProperNames::new(names, true);
918
919 assert!(rule.combined_pattern.is_some());
921
922 let content = "This has propername0 and propername999 in it.";
924 let ctx = create_context(content);
925 let result = rule.check(&ctx).unwrap();
926
927 assert_eq!(result.len(), 2, "Should handle 1000 names without issues");
929 }
930
931 #[test]
932 fn test_cache_behavior() {
933 let rule = MD044ProperNames::new(vec!["JavaScript".to_string()], true);
934
935 let content = "Using javascript here.";
936 let ctx = create_context(content);
937
938 let result1 = rule.check(&ctx).unwrap();
940 assert_eq!(result1.len(), 1);
941
942 let result2 = rule.check(&ctx).unwrap();
944 assert_eq!(result2.len(), 1);
945
946 assert_eq!(result1[0].line, result2[0].line);
948 assert_eq!(result1[0].column, result2[0].column);
949 }
950
951 #[test]
952 fn test_html_comments_not_checked_when_disabled() {
953 let config = MD044Config {
954 names: vec!["JavaScript".to_string()],
955 code_blocks: true, html_elements: true, html_comments: false, };
959 let rule = MD044ProperNames::from_config_struct(config);
960
961 let content = r#"Regular javascript here.
962<!-- This javascript in HTML comment should be ignored -->
963More javascript outside."#;
964
965 let ctx = create_context(content);
966 let result = rule.check(&ctx).unwrap();
967
968 assert_eq!(result.len(), 2, "Should only flag javascript outside HTML comments");
969 assert_eq!(result[0].line, 1);
970 assert_eq!(result[1].line, 3);
971 }
972
973 #[test]
974 fn test_html_comments_checked_when_enabled() {
975 let config = MD044Config {
976 names: vec!["JavaScript".to_string()],
977 code_blocks: true, html_elements: true, html_comments: true, };
981 let rule = MD044ProperNames::from_config_struct(config);
982
983 let content = r#"Regular javascript here.
984<!-- This javascript in HTML comment should be checked -->
985More javascript outside."#;
986
987 let ctx = create_context(content);
988 let result = rule.check(&ctx).unwrap();
989
990 assert_eq!(
991 result.len(),
992 3,
993 "Should flag all javascript occurrences including in HTML comments"
994 );
995 }
996
997 #[test]
998 fn test_multiline_html_comments() {
999 let config = MD044Config {
1000 names: vec!["Python".to_string(), "JavaScript".to_string()],
1001 code_blocks: true, html_elements: true, html_comments: false, };
1005 let rule = MD044ProperNames::from_config_struct(config);
1006
1007 let content = r#"Regular python here.
1008<!--
1009This is a multiline comment
1010with javascript and python
1011that should be ignored
1012-->
1013More javascript outside."#;
1014
1015 let ctx = create_context(content);
1016 let result = rule.check(&ctx).unwrap();
1017
1018 assert_eq!(result.len(), 2, "Should only flag names outside HTML comments");
1019 assert_eq!(result[0].line, 1); assert_eq!(result[1].line, 7); }
1022
1023 #[test]
1024 fn test_fix_preserves_html_comments_when_disabled() {
1025 let config = MD044Config {
1026 names: vec!["JavaScript".to_string()],
1027 code_blocks: true, html_elements: true, html_comments: false, };
1031 let rule = MD044ProperNames::from_config_struct(config);
1032
1033 let content = r#"javascript here.
1034<!-- javascript in comment -->
1035More javascript."#;
1036
1037 let ctx = create_context(content);
1038 let fixed = rule.fix(&ctx).unwrap();
1039
1040 let expected = r#"JavaScript here.
1041<!-- javascript in comment -->
1042More JavaScript."#;
1043
1044 assert_eq!(
1045 fixed, expected,
1046 "Should not fix names inside HTML comments when disabled"
1047 );
1048 }
1049
1050 #[test]
1051 fn test_proper_names_in_link_text_are_flagged() {
1052 let rule = MD044ProperNames::new(
1053 vec!["JavaScript".to_string(), "Node.js".to_string(), "Python".to_string()],
1054 true,
1055 );
1056
1057 let content = r#"Check this [javascript documentation](https://javascript.info) for info.
1058
1059Visit [node.js homepage](https://nodejs.org) and [python tutorial](https://python.org).
1060
1061Real javascript should be flagged.
1062
1063Also see the [typescript guide][ts-ref] for more.
1064
1065Real python should be flagged too.
1066
1067[ts-ref]: https://typescript.org/handbook"#;
1068
1069 let ctx = create_context(content);
1070 let result = rule.check(&ctx).unwrap();
1071
1072 assert_eq!(result.len(), 5, "Expected 5 warnings: 3 in link text + 2 standalone");
1079
1080 let line_1_warnings: Vec<_> = result.iter().filter(|w| w.line == 1).collect();
1082 assert_eq!(line_1_warnings.len(), 1);
1083 assert!(
1084 line_1_warnings[0]
1085 .message
1086 .contains("'javascript' should be 'JavaScript'")
1087 );
1088
1089 let line_3_warnings: Vec<_> = result.iter().filter(|w| w.line == 3).collect();
1090 assert_eq!(line_3_warnings.len(), 2); assert!(result.iter().any(|w| w.line == 5 && w.message.contains("'javascript'")));
1094 assert!(result.iter().any(|w| w.line == 9 && w.message.contains("'python'")));
1095 }
1096
1097 #[test]
1098 fn test_link_urls_not_flagged() {
1099 let rule = MD044ProperNames::new(vec!["JavaScript".to_string()], true);
1100
1101 let content = r#"[Link Text](https://javascript.info/guide)"#;
1103
1104 let ctx = create_context(content);
1105 let result = rule.check(&ctx).unwrap();
1106
1107 assert!(result.is_empty(), "URLs should not be checked for proper names");
1109 }
1110
1111 #[test]
1112 fn test_proper_names_in_image_alt_text_are_flagged() {
1113 let rule = MD044ProperNames::new(vec!["JavaScript".to_string()], true);
1114
1115 let content = r#"Here is a  image.
1116
1117Real javascript should be flagged."#;
1118
1119 let ctx = create_context(content);
1120 let result = rule.check(&ctx).unwrap();
1121
1122 assert_eq!(result.len(), 2, "Expected 2 warnings: 1 in alt text + 1 standalone");
1126 assert!(result[0].message.contains("'javascript' should be 'JavaScript'"));
1127 assert!(result[0].line == 1); assert!(result[1].message.contains("'javascript' should be 'JavaScript'"));
1129 assert!(result[1].line == 3); }
1131
1132 #[test]
1133 fn test_image_urls_not_flagged() {
1134 let rule = MD044ProperNames::new(vec!["JavaScript".to_string()], true);
1135
1136 let content = r#""#;
1138
1139 let ctx = create_context(content);
1140 let result = rule.check(&ctx).unwrap();
1141
1142 assert!(result.is_empty(), "Image URLs should not be checked for proper names");
1144 }
1145
1146 #[test]
1147 fn test_reference_link_text_flagged_but_definition_not() {
1148 let rule = MD044ProperNames::new(vec!["JavaScript".to_string(), "TypeScript".to_string()], true);
1149
1150 let content = r#"Check the [javascript guide][js-ref] for details.
1151
1152Real javascript should be flagged.
1153
1154[js-ref]: https://javascript.info/typescript/guide"#;
1155
1156 let ctx = create_context(content);
1157 let result = rule.check(&ctx).unwrap();
1158
1159 assert_eq!(result.len(), 2, "Expected 2 warnings: 1 in link text + 1 standalone");
1164 assert!(result.iter().any(|w| w.line == 1 && w.message.contains("'javascript'")));
1165 assert!(result.iter().any(|w| w.line == 3 && w.message.contains("'javascript'")));
1166 }
1167
1168 #[test]
1169 fn test_reference_definitions_not_flagged() {
1170 let rule = MD044ProperNames::new(vec!["JavaScript".to_string()], true);
1171
1172 let content = r#"[js-ref]: https://javascript.info/guide"#;
1174
1175 let ctx = create_context(content);
1176 let result = rule.check(&ctx).unwrap();
1177
1178 assert!(result.is_empty(), "Reference definitions should not be checked");
1180 }
1181
1182 #[test]
1183 fn test_wikilinks_text_is_flagged() {
1184 let rule = MD044ProperNames::new(vec!["JavaScript".to_string()], true);
1185
1186 let content = r#"[[javascript]]
1188
1189Regular javascript here.
1190
1191[[JavaScript|display text]]"#;
1192
1193 let ctx = create_context(content);
1194 let result = rule.check(&ctx).unwrap();
1195
1196 assert_eq!(result.len(), 2, "Expected 2 warnings: 1 in WikiLink + 1 standalone");
1200 assert!(
1201 result
1202 .iter()
1203 .any(|w| w.line == 1 && w.column == 3 && w.message.contains("'javascript'"))
1204 );
1205 assert!(result.iter().any(|w| w.line == 3 && w.message.contains("'javascript'")));
1206 }
1207
1208 #[test]
1209 fn test_url_link_text_not_flagged() {
1210 let rule = MD044ProperNames::new(vec!["GitHub".to_string()], true);
1211
1212 let content = r#"[https://github.com/org/repo](https://github.com/org/repo)
1214
1215[http://github.com/org/repo](http://github.com/org/repo)
1216
1217[www.github.com/org/repo](https://www.github.com/org/repo)"#;
1218
1219 let ctx = create_context(content);
1220 let result = rule.check(&ctx).unwrap();
1221
1222 assert!(
1223 result.is_empty(),
1224 "URL-like link text should not be flagged, got: {result:?}"
1225 );
1226 }
1227
1228 #[test]
1229 fn test_url_link_text_with_leading_space_not_flagged() {
1230 let rule = MD044ProperNames::new(vec!["GitHub".to_string()], true);
1231
1232 let content = r#"[ https://github.com/org/repo](https://github.com/org/repo)"#;
1234
1235 let ctx = create_context(content);
1236 let result = rule.check(&ctx).unwrap();
1237
1238 assert!(
1239 result.is_empty(),
1240 "URL-like link text with leading space should not be flagged, got: {result:?}"
1241 );
1242 }
1243
1244 #[test]
1245 fn test_url_link_text_uppercase_scheme_not_flagged() {
1246 let rule = MD044ProperNames::new(vec!["GitHub".to_string()], true);
1247
1248 let content = r#"[HTTPS://GITHUB.COM/org/repo](https://github.com/org/repo)"#;
1249
1250 let ctx = create_context(content);
1251 let result = rule.check(&ctx).unwrap();
1252
1253 assert!(
1254 result.is_empty(),
1255 "URL-like link text with uppercase scheme should not be flagged, got: {result:?}"
1256 );
1257 }
1258
1259 #[test]
1260 fn test_non_url_link_text_still_flagged() {
1261 let rule = MD044ProperNames::new(vec!["GitHub".to_string()], true);
1262
1263 let content = r#"[github.com/org/repo](https://github.com/org/repo)
1265
1266[Visit github](https://github.com/org/repo)
1267
1268[//github.com/org/repo](//github.com/org/repo)
1269
1270[ftp://github.com/org/repo](ftp://github.com/org/repo)"#;
1271
1272 let ctx = create_context(content);
1273 let result = rule.check(&ctx).unwrap();
1274
1275 assert_eq!(result.len(), 4, "Non-URL link text should be flagged, got: {result:?}");
1276 assert!(result.iter().any(|w| w.line == 1)); assert!(result.iter().any(|w| w.line == 3)); assert!(result.iter().any(|w| w.line == 5)); assert!(result.iter().any(|w| w.line == 7)); }
1281
1282 #[test]
1283 fn test_url_link_text_fix_not_applied() {
1284 let rule = MD044ProperNames::new(vec!["GitHub".to_string()], true);
1285
1286 let content = "[https://github.com/org/repo](https://github.com/org/repo)\n";
1287
1288 let ctx = create_context(content);
1289 let result = rule.fix(&ctx).unwrap();
1290
1291 assert_eq!(result, content, "Fix should not modify URL-like link text");
1292 }
1293
1294 #[test]
1295 fn test_mixed_url_and_regular_link_text() {
1296 let rule = MD044ProperNames::new(vec!["GitHub".to_string()], true);
1297
1298 let content = r#"[https://github.com/org/repo](https://github.com/org/repo)
1300
1301Visit [github documentation](https://github.com/docs) for details.
1302
1303[www.github.com/pricing](https://www.github.com/pricing)"#;
1304
1305 let ctx = create_context(content);
1306 let result = rule.check(&ctx).unwrap();
1307
1308 assert_eq!(
1310 result.len(),
1311 1,
1312 "Only non-URL link text should be flagged, got: {result:?}"
1313 );
1314 assert_eq!(result[0].line, 3);
1315 }
1316}