1use crate::utils::fast_hash;
2use crate::utils::regex_cache::{escape_regex, get_cached_fancy_regex};
3
4use crate::rule::{Fix, LintError, LintResult, LintWarning, Rule, Severity};
5use std::collections::{HashMap, HashSet};
6use std::sync::{Arc, Mutex};
7
8mod md044_config;
9pub use md044_config::MD044Config;
10
11type WarningPosition = (usize, usize, String); #[derive(Clone)]
68pub struct MD044ProperNames {
69 config: MD044Config,
70 combined_pattern: Option<String>,
72 name_variants: Vec<String>,
74 content_cache: Arc<Mutex<HashMap<u64, Vec<WarningPosition>>>>,
76}
77
78impl MD044ProperNames {
79 pub fn new(names: Vec<String>, code_blocks: bool) -> Self {
80 let config = MD044Config {
81 names,
82 code_blocks,
83 html_elements: true, html_comments: true, };
86 let combined_pattern = Self::create_combined_pattern(&config);
87 let name_variants = Self::build_name_variants(&config);
88 Self {
89 config,
90 combined_pattern,
91 name_variants,
92 content_cache: Arc::new(Mutex::new(HashMap::new())),
93 }
94 }
95
96 fn ascii_normalize(s: &str) -> String {
98 s.replace(['é', 'è', 'ê', 'ë'], "e")
99 .replace(['à', 'á', 'â', 'ä', 'ã', 'å'], "a")
100 .replace(['ï', 'î', 'í', 'ì'], "i")
101 .replace(['ü', 'ú', 'ù', 'û'], "u")
102 .replace(['ö', 'ó', 'ò', 'ô', 'õ'], "o")
103 .replace('ñ', "n")
104 .replace('ç', "c")
105 }
106
107 pub fn from_config_struct(config: MD044Config) -> Self {
108 let combined_pattern = Self::create_combined_pattern(&config);
109 let name_variants = Self::build_name_variants(&config);
110 Self {
111 config,
112 combined_pattern,
113 name_variants,
114 content_cache: Arc::new(Mutex::new(HashMap::new())),
115 }
116 }
117
118 fn create_combined_pattern(config: &MD044Config) -> Option<String> {
120 if config.names.is_empty() {
121 return None;
122 }
123
124 let mut patterns: Vec<String> = config
126 .names
127 .iter()
128 .flat_map(|name| {
129 let mut variations = vec![];
130 let lower_name = name.to_lowercase();
131
132 variations.push(escape_regex(&lower_name));
134
135 let lower_name_no_dots = lower_name.replace('.', "");
137 if lower_name != lower_name_no_dots {
138 variations.push(escape_regex(&lower_name_no_dots));
139 }
140
141 let ascii_normalized = Self::ascii_normalize(&lower_name);
143
144 if ascii_normalized != lower_name {
145 variations.push(escape_regex(&ascii_normalized));
146
147 let ascii_no_dots = ascii_normalized.replace('.', "");
149 if ascii_normalized != ascii_no_dots {
150 variations.push(escape_regex(&ascii_no_dots));
151 }
152 }
153
154 variations
155 })
156 .collect();
157
158 patterns.sort_by_key(|b| std::cmp::Reverse(b.len()));
160
161 Some(format!(r"(?i)({})", patterns.join("|")))
164 }
165
166 fn build_name_variants(config: &MD044Config) -> Vec<String> {
167 let mut variants = HashSet::new();
168 for name in &config.names {
169 let lower_name = name.to_lowercase();
170 variants.insert(lower_name.clone());
171
172 let lower_no_dots = lower_name.replace('.', "");
173 if lower_name != lower_no_dots {
174 variants.insert(lower_no_dots);
175 }
176
177 let ascii_normalized = Self::ascii_normalize(&lower_name);
178 if ascii_normalized != lower_name {
179 variants.insert(ascii_normalized.clone());
180
181 let ascii_no_dots = ascii_normalized.replace('.', "");
182 if ascii_normalized != ascii_no_dots {
183 variants.insert(ascii_no_dots);
184 }
185 }
186 }
187
188 variants.into_iter().collect()
189 }
190
191 fn find_name_violations(
194 &self,
195 content: &str,
196 ctx: &crate::lint_context::LintContext,
197 content_lower: &str,
198 ) -> Vec<WarningPosition> {
199 if self.config.names.is_empty() || content.is_empty() || self.combined_pattern.is_none() {
201 return Vec::new();
202 }
203
204 let has_potential_matches = self.name_variants.iter().any(|name| content_lower.contains(name));
206
207 if !has_potential_matches {
208 return Vec::new();
209 }
210
211 let hash = fast_hash(content);
213 {
214 if let Ok(cache) = self.content_cache.lock()
216 && let Some(cached) = cache.get(&hash)
217 {
218 return cached.clone();
219 }
220 }
221
222 let mut violations = Vec::new();
223
224 let combined_regex = match &self.combined_pattern {
226 Some(pattern) => match get_cached_fancy_regex(pattern) {
227 Ok(regex) => regex,
228 Err(_) => return Vec::new(),
229 },
230 None => return Vec::new(),
231 };
232
233 for (line_idx, line_info) in ctx.lines.iter().enumerate() {
235 let line_num = line_idx + 1;
236 let line = line_info.content(ctx.content);
237
238 let trimmed = line.trim_start();
240 if trimmed.starts_with("```") || trimmed.starts_with("~~~") {
241 continue;
242 }
243
244 if !self.config.code_blocks && line_info.in_code_block {
246 continue;
247 }
248
249 if !self.config.html_elements && line_info.in_html_block {
251 continue;
252 }
253
254 if !self.config.html_comments && line_info.in_html_comment {
256 continue;
257 }
258
259 if line_info.in_jsx_expression || line_info.in_mdx_comment {
261 continue;
262 }
263
264 if line_info.in_obsidian_comment {
266 continue;
267 }
268
269 let line_lower = line.to_lowercase();
271 let has_line_matches = self.name_variants.iter().any(|name| line_lower.contains(name));
272
273 if !has_line_matches {
274 continue;
275 }
276
277 for cap_result in combined_regex.find_iter(line) {
279 match cap_result {
280 Ok(cap) => {
281 let found_name = &line[cap.start()..cap.end()];
282
283 let start_pos = cap.start();
285 let end_pos = cap.end();
286
287 if !Self::is_at_word_boundary(line, start_pos, true)
288 || !Self::is_at_word_boundary(line, end_pos, false)
289 {
290 continue; }
292
293 if !self.config.code_blocks {
295 let byte_pos = line_info.byte_offset + cap.start();
296 if ctx.is_in_code_block_or_span(byte_pos) {
297 continue;
298 }
299 }
300
301 let byte_pos = line_info.byte_offset + cap.start();
303 if Self::is_in_link(ctx, byte_pos) {
304 continue;
305 }
306
307 if let Some(proper_name) = self.get_proper_name_for(found_name) {
309 if found_name != proper_name {
311 violations.push((line_num, cap.start() + 1, found_name.to_string()));
312 }
313 }
314 }
315 Err(e) => {
316 eprintln!("Regex execution error on line {line_num}: {e}");
317 }
318 }
319 }
320 }
321
322 if let Ok(mut cache) = self.content_cache.lock() {
324 cache.insert(hash, violations.clone());
325 }
326 violations
327 }
328
329 fn is_in_link(ctx: &crate::lint_context::LintContext, byte_pos: usize) -> bool {
336 use pulldown_cmark::LinkType;
337
338 let link_idx = ctx.links.partition_point(|link| link.byte_offset <= byte_pos);
340 if link_idx > 0 {
341 let link = &ctx.links[link_idx - 1];
342 if byte_pos < link.byte_end {
343 let text_start = if matches!(link.link_type, LinkType::WikiLink { .. }) {
345 link.byte_offset + 2
346 } else {
347 link.byte_offset + 1
348 };
349 let text_end = text_start + link.text.len();
350
351 if byte_pos >= text_start && byte_pos < text_end {
353 return Self::link_text_is_url(&link.text);
354 }
355 return true;
357 }
358 }
359
360 let image_idx = ctx.images.partition_point(|img| img.byte_offset <= byte_pos);
362 if image_idx > 0 {
363 let image = &ctx.images[image_idx - 1];
364 if byte_pos < image.byte_end {
365 let alt_start = image.byte_offset + 2;
367 let alt_end = alt_start + image.alt_text.len();
368
369 if byte_pos >= alt_start && byte_pos < alt_end {
371 return false;
372 }
373 return true;
375 }
376 }
377
378 ctx.is_in_reference_def(byte_pos)
380 }
381
382 fn link_text_is_url(text: &str) -> bool {
385 let lower = text.trim().to_ascii_lowercase();
386 lower.starts_with("http://") || lower.starts_with("https://") || lower.starts_with("www.")
387 }
388
389 fn is_word_boundary_char(c: char) -> bool {
391 !c.is_alphanumeric()
392 }
393
394 fn is_word_boundary_char_js(c: char) -> bool {
398 !c.is_alphanumeric() && c != '_'
399 }
400
401 fn is_in_html_tag(line: &str, pos: usize) -> bool {
403 let before = &line[..pos.min(line.len())];
404 match (before.rfind('<'), before.rfind('>')) {
405 (Some(open), Some(close)) => open > close,
406 (Some(_), None) => true,
407 _ => false,
408 }
409 }
410
411 fn is_at_word_boundary(content: &str, pos: usize, is_start: bool) -> bool {
413 let boundary_fn: fn(char) -> bool = if Self::is_in_html_tag(content, pos) {
416 Self::is_word_boundary_char_js
417 } else {
418 Self::is_word_boundary_char
419 };
420 if is_start {
421 if pos == 0 {
422 return true;
423 }
424 match content[..pos].chars().next_back() {
426 None => true,
427 Some(c) => boundary_fn(c),
428 }
429 } else {
430 if pos >= content.len() {
431 return true;
432 }
433 match content[pos..].chars().next() {
435 None => true,
436 Some(c) => boundary_fn(c),
437 }
438 }
439 }
440
441 fn get_proper_name_for(&self, found_name: &str) -> Option<String> {
443 let found_lower = found_name.to_lowercase();
444
445 for name in &self.config.names {
447 let lower_name = name.to_lowercase();
448 let lower_name_no_dots = lower_name.replace('.', "");
449
450 if found_lower == lower_name || found_lower == lower_name_no_dots {
452 return Some(name.clone());
453 }
454
455 let ascii_normalized = Self::ascii_normalize(&lower_name);
457
458 let ascii_no_dots = ascii_normalized.replace('.', "");
459
460 if found_lower == ascii_normalized || found_lower == ascii_no_dots {
461 return Some(name.clone());
462 }
463 }
464 None
465 }
466}
467
468impl Rule for MD044ProperNames {
469 fn name(&self) -> &'static str {
470 "MD044"
471 }
472
473 fn description(&self) -> &'static str {
474 "Proper names should have the correct capitalization"
475 }
476
477 fn should_skip(&self, ctx: &crate::lint_context::LintContext) -> bool {
478 if self.config.names.is_empty() {
479 return true;
480 }
481 let content_lower = if ctx.content.is_ascii() {
483 ctx.content.to_ascii_lowercase()
484 } else {
485 ctx.content.to_lowercase()
486 };
487 !self.name_variants.iter().any(|name| content_lower.contains(name))
488 }
489
490 fn check(&self, ctx: &crate::lint_context::LintContext) -> LintResult {
491 let content = ctx.content;
492 if content.is_empty() || self.config.names.is_empty() || self.combined_pattern.is_none() {
493 return Ok(Vec::new());
494 }
495
496 let content_lower = if content.is_ascii() {
498 content.to_ascii_lowercase()
499 } else {
500 content.to_lowercase()
501 };
502
503 let has_potential_matches = self.name_variants.iter().any(|name| content_lower.contains(name));
505
506 if !has_potential_matches {
507 return Ok(Vec::new());
508 }
509
510 let line_index = &ctx.line_index;
511 let violations = self.find_name_violations(content, ctx, &content_lower);
512
513 let warnings = violations
514 .into_iter()
515 .filter_map(|(line, column, found_name)| {
516 self.get_proper_name_for(&found_name).map(|proper_name| LintWarning {
517 rule_name: Some(self.name().to_string()),
518 line,
519 column,
520 end_line: line,
521 end_column: column + found_name.len(),
522 message: format!("Proper name '{found_name}' should be '{proper_name}'"),
523 severity: Severity::Warning,
524 fix: Some(Fix {
525 range: line_index.line_col_to_byte_range(line, column),
526 replacement: proper_name,
527 }),
528 })
529 })
530 .collect();
531
532 Ok(warnings)
533 }
534
535 fn fix(&self, ctx: &crate::lint_context::LintContext) -> Result<String, LintError> {
536 let content = ctx.content;
537 if content.is_empty() || self.config.names.is_empty() {
538 return Ok(content.to_string());
539 }
540
541 let content_lower = if content.is_ascii() {
542 content.to_ascii_lowercase()
543 } else {
544 content.to_lowercase()
545 };
546 let violations = self.find_name_violations(content, ctx, &content_lower);
547 if violations.is_empty() {
548 return Ok(content.to_string());
549 }
550
551 let mut fixed_lines = Vec::new();
553
554 let mut violations_by_line: HashMap<usize, Vec<(usize, String)>> = HashMap::new();
556 for (line_num, col_num, found_name) in violations {
557 violations_by_line
558 .entry(line_num)
559 .or_default()
560 .push((col_num, found_name));
561 }
562
563 for violations in violations_by_line.values_mut() {
565 violations.sort_by_key(|b| std::cmp::Reverse(b.0));
566 }
567
568 for (line_idx, line_info) in ctx.lines.iter().enumerate() {
570 let line_num = line_idx + 1;
571
572 if let Some(line_violations) = violations_by_line.get(&line_num) {
573 let mut fixed_line = line_info.content(ctx.content).to_string();
575
576 for (col_num, found_name) in line_violations {
577 if let Some(proper_name) = self.get_proper_name_for(found_name) {
578 let start_col = col_num - 1; let end_col = start_col + found_name.len();
580
581 if end_col <= fixed_line.len()
582 && fixed_line.is_char_boundary(start_col)
583 && fixed_line.is_char_boundary(end_col)
584 {
585 fixed_line.replace_range(start_col..end_col, &proper_name);
586 }
587 }
588 }
589
590 fixed_lines.push(fixed_line);
591 } else {
592 fixed_lines.push(line_info.content(ctx.content).to_string());
594 }
595 }
596
597 let mut result = fixed_lines.join("\n");
599 if content.ends_with('\n') && !result.ends_with('\n') {
600 result.push('\n');
601 }
602 Ok(result)
603 }
604
605 fn as_any(&self) -> &dyn std::any::Any {
606 self
607 }
608
609 fn default_config_section(&self) -> Option<(String, toml::Value)> {
610 let json_value = serde_json::to_value(&self.config).ok()?;
611 Some((
612 self.name().to_string(),
613 crate::rule_config_serde::json_to_toml_value(&json_value)?,
614 ))
615 }
616
617 fn from_config(config: &crate::config::Config) -> Box<dyn Rule>
618 where
619 Self: Sized,
620 {
621 let rule_config = crate::rule_config_serde::load_rule_config::<MD044Config>(config);
622 Box::new(Self::from_config_struct(rule_config))
623 }
624}
625
626#[cfg(test)]
627mod tests {
628 use super::*;
629 use crate::lint_context::LintContext;
630
631 fn create_context(content: &str) -> LintContext<'_> {
632 LintContext::new(content, crate::config::MarkdownFlavor::Standard, None)
633 }
634
635 #[test]
636 fn test_correctly_capitalized_names() {
637 let rule = MD044ProperNames::new(
638 vec![
639 "JavaScript".to_string(),
640 "TypeScript".to_string(),
641 "Node.js".to_string(),
642 ],
643 true,
644 );
645
646 let content = "This document uses JavaScript, TypeScript, and Node.js correctly.";
647 let ctx = create_context(content);
648 let result = rule.check(&ctx).unwrap();
649 assert!(result.is_empty(), "Should not flag correctly capitalized names");
650 }
651
652 #[test]
653 fn test_incorrectly_capitalized_names() {
654 let rule = MD044ProperNames::new(vec!["JavaScript".to_string(), "TypeScript".to_string()], true);
655
656 let content = "This document uses javascript and typescript incorrectly.";
657 let ctx = create_context(content);
658 let result = rule.check(&ctx).unwrap();
659
660 assert_eq!(result.len(), 2, "Should flag two incorrect capitalizations");
661 assert_eq!(result[0].message, "Proper name 'javascript' should be 'JavaScript'");
662 assert_eq!(result[0].line, 1);
663 assert_eq!(result[0].column, 20);
664 assert_eq!(result[1].message, "Proper name 'typescript' should be 'TypeScript'");
665 assert_eq!(result[1].line, 1);
666 assert_eq!(result[1].column, 35);
667 }
668
669 #[test]
670 fn test_names_at_beginning_of_sentences() {
671 let rule = MD044ProperNames::new(vec!["JavaScript".to_string(), "Python".to_string()], true);
672
673 let content = "javascript is a great language. python is also popular.";
674 let ctx = create_context(content);
675 let result = rule.check(&ctx).unwrap();
676
677 assert_eq!(result.len(), 2, "Should flag names at beginning of sentences");
678 assert_eq!(result[0].line, 1);
679 assert_eq!(result[0].column, 1);
680 assert_eq!(result[1].line, 1);
681 assert_eq!(result[1].column, 33);
682 }
683
684 #[test]
685 fn test_names_in_code_blocks_checked_by_default() {
686 let rule = MD044ProperNames::new(vec!["JavaScript".to_string()], true);
687
688 let content = r#"Here is some text with JavaScript.
689
690```javascript
691// This javascript should be checked
692const lang = "javascript";
693```
694
695But this javascript should be flagged."#;
696
697 let ctx = create_context(content);
698 let result = rule.check(&ctx).unwrap();
699
700 assert_eq!(result.len(), 3, "Should flag javascript inside and outside code blocks");
701 assert_eq!(result[0].line, 4);
702 assert_eq!(result[1].line, 5);
703 assert_eq!(result[2].line, 8);
704 }
705
706 #[test]
707 fn test_names_in_code_blocks_ignored_when_disabled() {
708 let rule = MD044ProperNames::new(
709 vec!["JavaScript".to_string()],
710 false, );
712
713 let content = r#"```
714javascript in code block
715```"#;
716
717 let ctx = create_context(content);
718 let result = rule.check(&ctx).unwrap();
719
720 assert_eq!(
721 result.len(),
722 0,
723 "Should not flag javascript in code blocks when code_blocks is false"
724 );
725 }
726
727 #[test]
728 fn test_names_in_inline_code_checked_by_default() {
729 let rule = MD044ProperNames::new(vec!["JavaScript".to_string()], true);
730
731 let content = "This is `javascript` in inline code and javascript outside.";
732 let ctx = create_context(content);
733 let result = rule.check(&ctx).unwrap();
734
735 assert_eq!(result.len(), 2, "Should flag javascript inside and outside inline code");
737 assert_eq!(result[0].column, 10); assert_eq!(result[1].column, 41); }
740
741 #[test]
742 fn test_multiple_names_in_same_line() {
743 let rule = MD044ProperNames::new(
744 vec!["JavaScript".to_string(), "TypeScript".to_string(), "React".to_string()],
745 true,
746 );
747
748 let content = "I use javascript, typescript, and react in my projects.";
749 let ctx = create_context(content);
750 let result = rule.check(&ctx).unwrap();
751
752 assert_eq!(result.len(), 3, "Should flag all three incorrect names");
753 assert_eq!(result[0].message, "Proper name 'javascript' should be 'JavaScript'");
754 assert_eq!(result[1].message, "Proper name 'typescript' should be 'TypeScript'");
755 assert_eq!(result[2].message, "Proper name 'react' should be 'React'");
756 }
757
758 #[test]
759 fn test_case_sensitivity() {
760 let rule = MD044ProperNames::new(vec!["JavaScript".to_string()], true);
761
762 let content = "JAVASCRIPT, Javascript, javascript, and JavaScript variations.";
763 let ctx = create_context(content);
764 let result = rule.check(&ctx).unwrap();
765
766 assert_eq!(result.len(), 3, "Should flag all incorrect case variations");
767 assert!(result.iter().all(|w| w.message.contains("should be 'JavaScript'")));
769 }
770
771 #[test]
772 fn test_configuration_with_custom_name_list() {
773 let config = MD044Config {
774 names: vec!["GitHub".to_string(), "GitLab".to_string(), "DevOps".to_string()],
775 code_blocks: true,
776 html_elements: true,
777 html_comments: true,
778 };
779 let rule = MD044ProperNames::from_config_struct(config);
780
781 let content = "We use github, gitlab, and devops for our workflow.";
782 let ctx = create_context(content);
783 let result = rule.check(&ctx).unwrap();
784
785 assert_eq!(result.len(), 3, "Should flag all custom names");
786 assert_eq!(result[0].message, "Proper name 'github' should be 'GitHub'");
787 assert_eq!(result[1].message, "Proper name 'gitlab' should be 'GitLab'");
788 assert_eq!(result[2].message, "Proper name 'devops' should be 'DevOps'");
789 }
790
791 #[test]
792 fn test_empty_configuration() {
793 let rule = MD044ProperNames::new(vec![], true);
794
795 let content = "This has javascript and typescript but no configured names.";
796 let ctx = create_context(content);
797 let result = rule.check(&ctx).unwrap();
798
799 assert!(result.is_empty(), "Should not flag anything with empty configuration");
800 }
801
802 #[test]
803 fn test_names_with_special_characters() {
804 let rule = MD044ProperNames::new(
805 vec!["Node.js".to_string(), "ASP.NET".to_string(), "C++".to_string()],
806 true,
807 );
808
809 let content = "We use nodejs, asp.net, ASP.NET, and c++ in our stack.";
810 let ctx = create_context(content);
811 let result = rule.check(&ctx).unwrap();
812
813 assert_eq!(result.len(), 3, "Should handle special characters correctly");
818
819 let messages: Vec<&str> = result.iter().map(|w| w.message.as_str()).collect();
820 assert!(messages.contains(&"Proper name 'nodejs' should be 'Node.js'"));
821 assert!(messages.contains(&"Proper name 'asp.net' should be 'ASP.NET'"));
822 assert!(messages.contains(&"Proper name 'c++' should be 'C++'"));
823 }
824
825 #[test]
826 fn test_word_boundaries() {
827 let rule = MD044ProperNames::new(vec!["Java".to_string(), "Script".to_string()], true);
828
829 let content = "JavaScript is not java or script, but Java and Script are separate.";
830 let ctx = create_context(content);
831 let result = rule.check(&ctx).unwrap();
832
833 assert_eq!(result.len(), 2, "Should respect word boundaries");
835 assert!(result.iter().any(|w| w.column == 19)); assert!(result.iter().any(|w| w.column == 27)); }
838
839 #[test]
840 fn test_fix_method() {
841 let rule = MD044ProperNames::new(
842 vec![
843 "JavaScript".to_string(),
844 "TypeScript".to_string(),
845 "Node.js".to_string(),
846 ],
847 true,
848 );
849
850 let content = "I love javascript, typescript, and nodejs!";
851 let ctx = create_context(content);
852 let fixed = rule.fix(&ctx).unwrap();
853
854 assert_eq!(fixed, "I love JavaScript, TypeScript, and Node.js!");
855 }
856
857 #[test]
858 fn test_fix_multiple_occurrences() {
859 let rule = MD044ProperNames::new(vec!["Python".to_string()], true);
860
861 let content = "python is great. I use python daily. PYTHON is powerful.";
862 let ctx = create_context(content);
863 let fixed = rule.fix(&ctx).unwrap();
864
865 assert_eq!(fixed, "Python is great. I use Python daily. Python is powerful.");
866 }
867
868 #[test]
869 fn test_fix_checks_code_blocks_by_default() {
870 let rule = MD044ProperNames::new(vec!["JavaScript".to_string()], true);
871
872 let content = r#"I love javascript.
873
874```
875const lang = "javascript";
876```
877
878More javascript here."#;
879
880 let ctx = create_context(content);
881 let fixed = rule.fix(&ctx).unwrap();
882
883 let expected = r#"I love JavaScript.
884
885```
886const lang = "JavaScript";
887```
888
889More JavaScript here."#;
890
891 assert_eq!(fixed, expected);
892 }
893
894 #[test]
895 fn test_multiline_content() {
896 let rule = MD044ProperNames::new(vec!["Rust".to_string(), "Python".to_string()], true);
897
898 let content = r#"First line with rust.
899Second line with python.
900Third line with RUST and PYTHON."#;
901
902 let ctx = create_context(content);
903 let result = rule.check(&ctx).unwrap();
904
905 assert_eq!(result.len(), 4, "Should flag all incorrect occurrences");
906 assert_eq!(result[0].line, 1);
907 assert_eq!(result[1].line, 2);
908 assert_eq!(result[2].line, 3);
909 assert_eq!(result[3].line, 3);
910 }
911
912 #[test]
913 fn test_default_config() {
914 let config = MD044Config::default();
915 assert!(config.names.is_empty());
916 assert!(!config.code_blocks); }
918
919 #[test]
920 fn test_performance_with_many_names() {
921 let mut names = vec![];
922 for i in 0..50 {
923 names.push(format!("ProperName{i}"));
924 }
925
926 let rule = MD044ProperNames::new(names, true);
927
928 let content = "This has propername0, propername25, and propername49 incorrectly.";
929 let ctx = create_context(content);
930 let result = rule.check(&ctx).unwrap();
931
932 assert_eq!(result.len(), 3, "Should handle many configured names efficiently");
933 }
934
935 #[test]
936 fn test_large_name_count_performance() {
937 let names = (0..1000).map(|i| format!("ProperName{i}")).collect::<Vec<_>>();
940
941 let rule = MD044ProperNames::new(names, true);
942
943 assert!(rule.combined_pattern.is_some());
945
946 let content = "This has propername0 and propername999 in it.";
948 let ctx = create_context(content);
949 let result = rule.check(&ctx).unwrap();
950
951 assert_eq!(result.len(), 2, "Should handle 1000 names without issues");
953 }
954
955 #[test]
956 fn test_cache_behavior() {
957 let rule = MD044ProperNames::new(vec!["JavaScript".to_string()], true);
958
959 let content = "Using javascript here.";
960 let ctx = create_context(content);
961
962 let result1 = rule.check(&ctx).unwrap();
964 assert_eq!(result1.len(), 1);
965
966 let result2 = rule.check(&ctx).unwrap();
968 assert_eq!(result2.len(), 1);
969
970 assert_eq!(result1[0].line, result2[0].line);
972 assert_eq!(result1[0].column, result2[0].column);
973 }
974
975 #[test]
976 fn test_html_comments_not_checked_when_disabled() {
977 let config = MD044Config {
978 names: vec!["JavaScript".to_string()],
979 code_blocks: true, html_elements: true, html_comments: false, };
983 let rule = MD044ProperNames::from_config_struct(config);
984
985 let content = r#"Regular javascript here.
986<!-- This javascript in HTML comment should be ignored -->
987More javascript outside."#;
988
989 let ctx = create_context(content);
990 let result = rule.check(&ctx).unwrap();
991
992 assert_eq!(result.len(), 2, "Should only flag javascript outside HTML comments");
993 assert_eq!(result[0].line, 1);
994 assert_eq!(result[1].line, 3);
995 }
996
997 #[test]
998 fn test_html_comments_checked_when_enabled() {
999 let config = MD044Config {
1000 names: vec!["JavaScript".to_string()],
1001 code_blocks: true, html_elements: true, html_comments: true, };
1005 let rule = MD044ProperNames::from_config_struct(config);
1006
1007 let content = r#"Regular javascript here.
1008<!-- This javascript in HTML comment should be checked -->
1009More javascript outside."#;
1010
1011 let ctx = create_context(content);
1012 let result = rule.check(&ctx).unwrap();
1013
1014 assert_eq!(
1015 result.len(),
1016 3,
1017 "Should flag all javascript occurrences including in HTML comments"
1018 );
1019 }
1020
1021 #[test]
1022 fn test_multiline_html_comments() {
1023 let config = MD044Config {
1024 names: vec!["Python".to_string(), "JavaScript".to_string()],
1025 code_blocks: true, html_elements: true, html_comments: false, };
1029 let rule = MD044ProperNames::from_config_struct(config);
1030
1031 let content = r#"Regular python here.
1032<!--
1033This is a multiline comment
1034with javascript and python
1035that should be ignored
1036-->
1037More javascript outside."#;
1038
1039 let ctx = create_context(content);
1040 let result = rule.check(&ctx).unwrap();
1041
1042 assert_eq!(result.len(), 2, "Should only flag names outside HTML comments");
1043 assert_eq!(result[0].line, 1); assert_eq!(result[1].line, 7); }
1046
1047 #[test]
1048 fn test_fix_preserves_html_comments_when_disabled() {
1049 let config = MD044Config {
1050 names: vec!["JavaScript".to_string()],
1051 code_blocks: true, html_elements: true, html_comments: false, };
1055 let rule = MD044ProperNames::from_config_struct(config);
1056
1057 let content = r#"javascript here.
1058<!-- javascript in comment -->
1059More javascript."#;
1060
1061 let ctx = create_context(content);
1062 let fixed = rule.fix(&ctx).unwrap();
1063
1064 let expected = r#"JavaScript here.
1065<!-- javascript in comment -->
1066More JavaScript."#;
1067
1068 assert_eq!(
1069 fixed, expected,
1070 "Should not fix names inside HTML comments when disabled"
1071 );
1072 }
1073
1074 #[test]
1075 fn test_proper_names_in_link_text_are_flagged() {
1076 let rule = MD044ProperNames::new(
1077 vec!["JavaScript".to_string(), "Node.js".to_string(), "Python".to_string()],
1078 true,
1079 );
1080
1081 let content = r#"Check this [javascript documentation](https://javascript.info) for info.
1082
1083Visit [node.js homepage](https://nodejs.org) and [python tutorial](https://python.org).
1084
1085Real javascript should be flagged.
1086
1087Also see the [typescript guide][ts-ref] for more.
1088
1089Real python should be flagged too.
1090
1091[ts-ref]: https://typescript.org/handbook"#;
1092
1093 let ctx = create_context(content);
1094 let result = rule.check(&ctx).unwrap();
1095
1096 assert_eq!(result.len(), 5, "Expected 5 warnings: 3 in link text + 2 standalone");
1103
1104 let line_1_warnings: Vec<_> = result.iter().filter(|w| w.line == 1).collect();
1106 assert_eq!(line_1_warnings.len(), 1);
1107 assert!(
1108 line_1_warnings[0]
1109 .message
1110 .contains("'javascript' should be 'JavaScript'")
1111 );
1112
1113 let line_3_warnings: Vec<_> = result.iter().filter(|w| w.line == 3).collect();
1114 assert_eq!(line_3_warnings.len(), 2); assert!(result.iter().any(|w| w.line == 5 && w.message.contains("'javascript'")));
1118 assert!(result.iter().any(|w| w.line == 9 && w.message.contains("'python'")));
1119 }
1120
1121 #[test]
1122 fn test_link_urls_not_flagged() {
1123 let rule = MD044ProperNames::new(vec!["JavaScript".to_string()], true);
1124
1125 let content = r#"[Link Text](https://javascript.info/guide)"#;
1127
1128 let ctx = create_context(content);
1129 let result = rule.check(&ctx).unwrap();
1130
1131 assert!(result.is_empty(), "URLs should not be checked for proper names");
1133 }
1134
1135 #[test]
1136 fn test_proper_names_in_image_alt_text_are_flagged() {
1137 let rule = MD044ProperNames::new(vec!["JavaScript".to_string()], true);
1138
1139 let content = r#"Here is a  image.
1140
1141Real javascript should be flagged."#;
1142
1143 let ctx = create_context(content);
1144 let result = rule.check(&ctx).unwrap();
1145
1146 assert_eq!(result.len(), 2, "Expected 2 warnings: 1 in alt text + 1 standalone");
1150 assert!(result[0].message.contains("'javascript' should be 'JavaScript'"));
1151 assert!(result[0].line == 1); assert!(result[1].message.contains("'javascript' should be 'JavaScript'"));
1153 assert!(result[1].line == 3); }
1155
1156 #[test]
1157 fn test_image_urls_not_flagged() {
1158 let rule = MD044ProperNames::new(vec!["JavaScript".to_string()], true);
1159
1160 let content = r#""#;
1162
1163 let ctx = create_context(content);
1164 let result = rule.check(&ctx).unwrap();
1165
1166 assert!(result.is_empty(), "Image URLs should not be checked for proper names");
1168 }
1169
1170 #[test]
1171 fn test_reference_link_text_flagged_but_definition_not() {
1172 let rule = MD044ProperNames::new(vec!["JavaScript".to_string(), "TypeScript".to_string()], true);
1173
1174 let content = r#"Check the [javascript guide][js-ref] for details.
1175
1176Real javascript should be flagged.
1177
1178[js-ref]: https://javascript.info/typescript/guide"#;
1179
1180 let ctx = create_context(content);
1181 let result = rule.check(&ctx).unwrap();
1182
1183 assert_eq!(result.len(), 2, "Expected 2 warnings: 1 in link text + 1 standalone");
1188 assert!(result.iter().any(|w| w.line == 1 && w.message.contains("'javascript'")));
1189 assert!(result.iter().any(|w| w.line == 3 && w.message.contains("'javascript'")));
1190 }
1191
1192 #[test]
1193 fn test_reference_definitions_not_flagged() {
1194 let rule = MD044ProperNames::new(vec!["JavaScript".to_string()], true);
1195
1196 let content = r#"[js-ref]: https://javascript.info/guide"#;
1198
1199 let ctx = create_context(content);
1200 let result = rule.check(&ctx).unwrap();
1201
1202 assert!(result.is_empty(), "Reference definitions should not be checked");
1204 }
1205
1206 #[test]
1207 fn test_wikilinks_text_is_flagged() {
1208 let rule = MD044ProperNames::new(vec!["JavaScript".to_string()], true);
1209
1210 let content = r#"[[javascript]]
1212
1213Regular javascript here.
1214
1215[[JavaScript|display text]]"#;
1216
1217 let ctx = create_context(content);
1218 let result = rule.check(&ctx).unwrap();
1219
1220 assert_eq!(result.len(), 2, "Expected 2 warnings: 1 in WikiLink + 1 standalone");
1224 assert!(
1225 result
1226 .iter()
1227 .any(|w| w.line == 1 && w.column == 3 && w.message.contains("'javascript'"))
1228 );
1229 assert!(result.iter().any(|w| w.line == 3 && w.message.contains("'javascript'")));
1230 }
1231
1232 #[test]
1233 fn test_url_link_text_not_flagged() {
1234 let rule = MD044ProperNames::new(vec!["GitHub".to_string()], true);
1235
1236 let content = r#"[https://github.com/org/repo](https://github.com/org/repo)
1238
1239[http://github.com/org/repo](http://github.com/org/repo)
1240
1241[www.github.com/org/repo](https://www.github.com/org/repo)"#;
1242
1243 let ctx = create_context(content);
1244 let result = rule.check(&ctx).unwrap();
1245
1246 assert!(
1247 result.is_empty(),
1248 "URL-like link text should not be flagged, got: {result:?}"
1249 );
1250 }
1251
1252 #[test]
1253 fn test_url_link_text_with_leading_space_not_flagged() {
1254 let rule = MD044ProperNames::new(vec!["GitHub".to_string()], true);
1255
1256 let content = r#"[ https://github.com/org/repo](https://github.com/org/repo)"#;
1258
1259 let ctx = create_context(content);
1260 let result = rule.check(&ctx).unwrap();
1261
1262 assert!(
1263 result.is_empty(),
1264 "URL-like link text with leading space should not be flagged, got: {result:?}"
1265 );
1266 }
1267
1268 #[test]
1269 fn test_url_link_text_uppercase_scheme_not_flagged() {
1270 let rule = MD044ProperNames::new(vec!["GitHub".to_string()], true);
1271
1272 let content = r#"[HTTPS://GITHUB.COM/org/repo](https://github.com/org/repo)"#;
1273
1274 let ctx = create_context(content);
1275 let result = rule.check(&ctx).unwrap();
1276
1277 assert!(
1278 result.is_empty(),
1279 "URL-like link text with uppercase scheme should not be flagged, got: {result:?}"
1280 );
1281 }
1282
1283 #[test]
1284 fn test_non_url_link_text_still_flagged() {
1285 let rule = MD044ProperNames::new(vec!["GitHub".to_string()], true);
1286
1287 let content = r#"[github.com/org/repo](https://github.com/org/repo)
1289
1290[Visit github](https://github.com/org/repo)
1291
1292[//github.com/org/repo](//github.com/org/repo)
1293
1294[ftp://github.com/org/repo](ftp://github.com/org/repo)"#;
1295
1296 let ctx = create_context(content);
1297 let result = rule.check(&ctx).unwrap();
1298
1299 assert_eq!(result.len(), 4, "Non-URL link text should be flagged, got: {result:?}");
1300 assert!(result.iter().any(|w| w.line == 1)); assert!(result.iter().any(|w| w.line == 3)); assert!(result.iter().any(|w| w.line == 5)); assert!(result.iter().any(|w| w.line == 7)); }
1305
1306 #[test]
1307 fn test_url_link_text_fix_not_applied() {
1308 let rule = MD044ProperNames::new(vec!["GitHub".to_string()], true);
1309
1310 let content = "[https://github.com/org/repo](https://github.com/org/repo)\n";
1311
1312 let ctx = create_context(content);
1313 let result = rule.fix(&ctx).unwrap();
1314
1315 assert_eq!(result, content, "Fix should not modify URL-like link text");
1316 }
1317
1318 #[test]
1319 fn test_mixed_url_and_regular_link_text() {
1320 let rule = MD044ProperNames::new(vec!["GitHub".to_string()], true);
1321
1322 let content = r#"[https://github.com/org/repo](https://github.com/org/repo)
1324
1325Visit [github documentation](https://github.com/docs) for details.
1326
1327[www.github.com/pricing](https://www.github.com/pricing)"#;
1328
1329 let ctx = create_context(content);
1330 let result = rule.check(&ctx).unwrap();
1331
1332 assert_eq!(
1334 result.len(),
1335 1,
1336 "Only non-URL link text should be flagged, got: {result:?}"
1337 );
1338 assert_eq!(result[0].line, 3);
1339 }
1340
1341 #[test]
1342 fn test_html_attribute_underscore_no_false_positive() {
1343 let rule = MD044ProperNames::new(vec!["Test".to_string()], true);
1348 let content = "# Heading\n\ntest\n\n<img src=\"www.example.test/test_image.png\">\n";
1349 let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1350 let result = rule.check(&ctx).unwrap();
1351
1352 let line5_violations: Vec<_> = result.iter().filter(|w| w.line == 5).collect();
1355 assert_eq!(
1356 line5_violations.len(),
1357 1,
1358 "Should flag only 'test' in 'example.test/' not in 'test_image': {line5_violations:?}"
1359 );
1360 assert_eq!(line5_violations[0].column, 23, "Should flag col 23 (example.test)");
1361
1362 let line3_violations: Vec<_> = result.iter().filter(|w| w.line == 3).collect();
1364 assert_eq!(line3_violations.len(), 1, "Plain 'test' on line 3 should be flagged");
1365 }
1366
1367 #[test]
1368 fn test_html_attribute_underscore_prefix_no_false_positive() {
1369 let rule = MD044ProperNames::new(vec!["Test".to_string()], true);
1372 let content = "# Heading\n\n<span data-test_id=\"value\">Test content</span>\n";
1373 let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1374 let result = rule.check(&ctx).unwrap();
1375
1376 assert!(
1379 result.is_empty(),
1380 "Should not flag 'test' in 'data-test_id' inside HTML or 'Test' that is already correct: {result:?}"
1381 );
1382 }
1383
1384 #[test]
1385 fn test_html_attribute_name_in_url_still_flagged() {
1386 let rule = MD044ProperNames::new(vec!["Test".to_string()], true);
1388 let content = "# Heading\n\n<a href=\"https://example.test/page\">test link</a>\n";
1389 let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1390 let result = rule.check(&ctx).unwrap();
1391
1392 assert_eq!(
1395 result.len(),
1396 2,
1397 "Should flag 'test' in href URL and in anchor text: {result:?}"
1398 );
1399 let cols: Vec<usize> = result.iter().map(|w| w.column).collect();
1400 assert!(
1401 cols.contains(&26),
1402 "Should flag col 26 (example.test in href): {cols:?}"
1403 );
1404 assert!(
1405 cols.contains(&37),
1406 "Should flag col 37 (test link in anchor text): {cols:?}"
1407 );
1408 }
1409
1410 #[test]
1411 fn test_plain_text_underscore_boundary_unchanged() {
1412 let rule = MD044ProperNames::new(vec!["Test".to_string()], true);
1415 let content = "# Heading\n\ntest_image is here and just_test ends here\n";
1416 let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1417 let result = rule.check(&ctx).unwrap();
1418
1419 assert_eq!(
1422 result.len(),
1423 2,
1424 "Should flag 'test' in both 'test_image' and 'just_test': {result:?}"
1425 );
1426 let cols: Vec<usize> = result.iter().map(|w| w.column).collect();
1427 assert!(cols.contains(&1), "Should flag col 1 (test_image): {cols:?}");
1428 assert!(cols.contains(&29), "Should flag col 29 (just_test): {cols:?}");
1429 }
1430}