1use crate::error::Result;
7use crate::rule::{Rule, RuleCategory, RuleMetadata};
8use crate::{
9 Document,
10 violation::{Severity, Violation},
11};
12use std::collections::HashMap;
13
14pub struct MD044 {
16 proper_names: HashMap<String, String>,
18}
19
20impl MD044 {
21 pub fn new() -> Self {
23 let mut proper_names = HashMap::new();
24
25 proper_names.insert("javascript".to_string(), "JavaScript".to_string());
27 proper_names.insert("typescript".to_string(), "TypeScript".to_string());
28 proper_names.insert("github".to_string(), "GitHub".to_string());
29 proper_names.insert("gitlab".to_string(), "GitLab".to_string());
30 proper_names.insert("bitbucket".to_string(), "Bitbucket".to_string());
31 proper_names.insert("nodejs".to_string(), "Node.js".to_string());
32 proper_names.insert("mysql".to_string(), "MySQL".to_string());
33 proper_names.insert("postgresql".to_string(), "PostgreSQL".to_string());
34 proper_names.insert("mongodb".to_string(), "MongoDB".to_string());
35 proper_names.insert("redis".to_string(), "Redis".to_string());
36 proper_names.insert("docker".to_string(), "Docker".to_string());
37 proper_names.insert("kubernetes".to_string(), "Kubernetes".to_string());
38 proper_names.insert("aws".to_string(), "AWS".to_string());
39 proper_names.insert("azure".to_string(), "Azure".to_string());
40 proper_names.insert("google cloud".to_string(), "Google Cloud".to_string());
41 proper_names.insert("gcp".to_string(), "GCP".to_string());
42 proper_names.insert("react".to_string(), "React".to_string());
43 proper_names.insert("vue".to_string(), "Vue".to_string());
44 proper_names.insert("angular".to_string(), "Angular".to_string());
45 proper_names.insert("webpack".to_string(), "webpack".to_string());
46 proper_names.insert("eslint".to_string(), "ESLint".to_string());
47 proper_names.insert("prettier".to_string(), "Prettier".to_string());
48 proper_names.insert("babel".to_string(), "Babel".to_string());
49 proper_names.insert("json".to_string(), "JSON".to_string());
50 proper_names.insert("xml".to_string(), "XML".to_string());
51 proper_names.insert("html".to_string(), "HTML".to_string());
52 proper_names.insert("css".to_string(), "CSS".to_string());
53 proper_names.insert("sass".to_string(), "Sass".to_string());
54 proper_names.insert("scss".to_string(), "SCSS".to_string());
55 proper_names.insert("less".to_string(), "Less".to_string());
56 proper_names.insert("api".to_string(), "API".to_string());
57 proper_names.insert("rest".to_string(), "REST".to_string());
58 proper_names.insert("graphql".to_string(), "GraphQL".to_string());
59 proper_names.insert("oauth".to_string(), "OAuth".to_string());
60 proper_names.insert("jwt".to_string(), "JWT".to_string());
61 proper_names.insert("http".to_string(), "HTTP".to_string());
62 proper_names.insert("https".to_string(), "HTTPS".to_string());
63 proper_names.insert("tcp".to_string(), "TCP".to_string());
64 proper_names.insert("udp".to_string(), "UDP".to_string());
65 proper_names.insert("ip".to_string(), "IP".to_string());
66 proper_names.insert("dns".to_string(), "DNS".to_string());
67 proper_names.insert("url".to_string(), "URL".to_string());
68 proper_names.insert("uri".to_string(), "URI".to_string());
69 proper_names.insert("uuid".to_string(), "UUID".to_string());
70
71 Self { proper_names }
72 }
73
74 #[allow(dead_code)]
76 pub fn with_names(proper_names: HashMap<String, String>) -> Self {
77 Self { proper_names }
78 }
79
80 #[allow(dead_code)]
82 pub fn add_name(&mut self, incorrect: String, correct: String) {
83 self.proper_names.insert(incorrect.to_lowercase(), correct);
84 }
85
86 fn check_line_names(&self, line: &str, line_number: usize) -> Vec<Violation> {
88 let mut violations = Vec::new();
89
90 if line.trim().is_empty() {
92 return violations;
93 }
94
95 let mut matches = Vec::new();
97
98 for (incorrect_lower, correct) in &self.proper_names {
99 let line_lower = line.to_lowercase();
102 let mut search_start = 0;
103
104 while let Some(byte_pos) = line_lower[search_start..].find(incorrect_lower) {
105 let absolute_byte_pos = search_start + byte_pos;
106
107 let char_pos = line[..absolute_byte_pos].chars().count();
109 let end_char_pos = char_pos + incorrect_lower.chars().count();
110
111 let line_chars: Vec<char> = line.chars().collect();
113 let is_word_start = char_pos == 0
114 || !line_chars
115 .get(char_pos.saturating_sub(1))
116 .unwrap_or(&' ')
117 .is_alphanumeric();
118 let is_word_end = end_char_pos >= line_chars.len()
119 || !line_chars
120 .get(end_char_pos)
121 .unwrap_or(&' ')
122 .is_alphanumeric();
123
124 if is_word_start && is_word_end {
125 let actual_text: String = line_chars[char_pos..end_char_pos].iter().collect();
127
128 if actual_text != *correct {
130 let safe_byte_pos = line
133 .char_indices()
134 .nth(char_pos)
135 .map(|(pos, _)| pos)
136 .unwrap_or(0);
137
138 if !self.is_in_code_span(line, safe_byte_pos)
140 && !self.is_in_url_context(line, safe_byte_pos)
141 {
142 matches.push((safe_byte_pos, actual_text, correct.clone()));
143 }
144 }
145 }
146
147 search_start = absolute_byte_pos + 1;
149 }
150 }
151
152 matches.sort_by_key(|(pos, _, _)| *pos);
154
155 for (pos, actual_text, correct) in matches {
157 violations.push(self.create_violation(
158 format!("Proper name '{actual_text}' should be capitalized as '{correct}'"),
159 line_number,
160 pos + 1, Severity::Warning,
162 ));
163 }
164
165 violations
166 }
167
168 fn is_in_code_span(&self, line: &str, pos: usize) -> bool {
170 let chars: Vec<char> = line.chars().collect();
171 let mut in_code_span = false;
172 let mut i = 0;
173
174 let char_pos = line[..pos.min(line.len())].chars().count();
176
177 while i < chars.len() && i <= char_pos {
178 if chars[i] == '`' {
179 let mut _backtick_count = 0;
181 let _start = i;
182 while i < chars.len() && chars[i] == '`' {
183 _backtick_count += 1;
184 i += 1;
185 }
186
187 if in_code_span {
188 in_code_span = false; } else {
191 in_code_span = true;
192 }
193 } else {
194 i += 1;
195 }
196 }
197
198 in_code_span
199 }
200
201 fn is_in_url_context(&self, line: &str, pos: usize) -> bool {
203 if let Some(url_start) = self.find_url_start(line, pos)
207 && let Some(url_end) = self.find_url_end(line, url_start)
208 {
209 return pos >= url_start && pos < url_end;
210 }
211
212 if let Some(link_url_range) = self.find_markdown_link_url(line, pos) {
214 return pos >= link_url_range.0 && pos < link_url_range.1;
215 }
216
217 false
218 }
219
220 fn find_url_start(&self, line: &str, pos: usize) -> Option<usize> {
222 let schemes = [
223 "https://", "http://", "ftp://", "ftps://", "mailto:", "file://",
224 ];
225
226 for scheme in &schemes {
229 for (char_pos, _) in line.char_indices() {
231 if char_pos > pos {
232 break; }
234
235 if char_pos + scheme.len() <= line.len() {
237 let end_pos = char_pos + scheme.len();
239 if line.is_char_boundary(end_pos) {
240 let slice = &line[char_pos..end_pos];
242 if slice.eq_ignore_ascii_case(scheme) {
243 if let Some(url_end) = self.find_url_end(line, char_pos)
245 && pos >= char_pos
246 && pos < url_end
247 {
248 return Some(char_pos);
249 }
250 }
251 }
252 }
253 }
254 }
255
256 None
257 }
258
259 fn find_url_end(&self, line: &str, url_start: usize) -> Option<usize> {
261 let chars: Vec<char> = line.chars().collect();
262
263 let char_start = line[..url_start.min(line.len())].chars().count();
265 let mut i = char_start;
266
267 while i < chars.len() && chars[i] != ':' {
269 i += 1;
270 }
271 if i < chars.len() && chars[i] == ':' {
272 i += 1;
273 if i + 1 < chars.len() && chars[i] == '/' && chars[i + 1] == '/' {
275 i += 2;
276 }
277 }
278
279 while i < chars.len() {
281 match chars[i] {
282 ' ' | '\t' | '\n' | ')' | ']' | ',' | ';' | '"' | '\'' => break,
284 _ => i += 1,
286 }
287 }
288
289 Some(i)
290 }
291
292 fn find_markdown_link_url(&self, line: &str, pos: usize) -> Option<(usize, usize)> {
294 let chars: Vec<char> = line.chars().collect();
295
296 let char_pos = line[..pos.min(line.len())].chars().count();
298
299 let mut i = if char_pos > 0 { char_pos - 1 } else { 0 };
304 let mut found_paren = false;
305 let mut found_bracket = false;
306
307 while i > 0 {
308 if i < chars.len() && chars[i] == '(' && !found_paren {
309 found_paren = true;
310 } else if i < chars.len() && chars[i] == ']' && found_paren && !found_bracket {
311 found_bracket = true;
312 break;
313 } else if i < chars.len() && (chars[i] == ' ' || chars[i] == '\n') {
314 break;
316 }
317 if i == 0 {
318 break;
319 }
320 i -= 1;
321 }
322
323 if !found_bracket || !found_paren {
324 return None;
325 }
326
327 let mut paren_pos = i + 1;
329 while paren_pos < chars.len() && chars[paren_pos] != '(' {
330 paren_pos += 1;
331 }
332
333 if paren_pos >= chars.len() {
334 return None;
335 }
336
337 let url_start = paren_pos + 1;
339 let mut url_end = url_start;
340 while url_end < chars.len() && chars[url_end] != ')' {
341 url_end += 1;
342 }
343
344 if url_end >= chars.len() {
345 return None;
346 }
347
348 if char_pos >= url_start && char_pos < url_end {
350 Some((url_start, url_end))
351 } else {
352 None
353 }
354 }
355
356 fn get_code_block_ranges(&self, lines: &[&str]) -> Vec<bool> {
358 let mut in_code_block = vec![false; lines.len()];
359 let mut in_fenced_block = false;
360
361 for (i, line) in lines.iter().enumerate() {
362 let trimmed = line.trim();
363
364 if trimmed.starts_with("```") || trimmed.starts_with("~~~") {
366 in_fenced_block = !in_fenced_block;
367 in_code_block[i] = true;
368 continue;
369 }
370
371 if in_fenced_block {
372 in_code_block[i] = true;
373 continue;
374 }
375 }
376
377 in_code_block
378 }
379}
380
381impl Default for MD044 {
382 fn default() -> Self {
383 Self::new()
384 }
385}
386
387impl Rule for MD044 {
388 fn id(&self) -> &'static str {
389 "MD044"
390 }
391
392 fn name(&self) -> &'static str {
393 "proper-names"
394 }
395
396 fn description(&self) -> &'static str {
397 "Proper names should have the correct capitalization"
398 }
399
400 fn metadata(&self) -> RuleMetadata {
401 RuleMetadata::stable(RuleCategory::Content).introduced_in("mdbook-lint v0.1.0")
402 }
403
404 fn check_with_ast<'a>(
405 &self,
406 document: &Document,
407 _ast: Option<&'a comrak::nodes::AstNode<'a>>,
408 ) -> Result<Vec<Violation>> {
409 let mut violations = Vec::new();
410 let lines: Vec<&str> = document.content.lines().collect();
411 let in_code_block = self.get_code_block_ranges(&lines);
412
413 for (line_number, line) in lines.iter().enumerate() {
414 let line_number = line_number + 1;
415
416 if in_code_block[line_number - 1] {
418 continue;
419 }
420
421 violations.extend(self.check_line_names(line, line_number));
422 }
423
424 Ok(violations)
425 }
426}
427
428#[cfg(test)]
429mod tests {
430 use super::*;
431 use crate::rule::Rule;
432 use std::path::PathBuf;
433
434 fn create_test_document(content: &str) -> Document {
435 Document::new(content.to_string(), PathBuf::from("test.md")).unwrap()
436 }
437
438 #[test]
439 fn test_md044_correct_capitalization_valid() {
440 let content = r#"This document uses JavaScript and GitHub correctly.
441
442We also use Node.js and MongoDB in our stack.
443
444The API is built with GraphQL and runs on AWS.
445"#;
446
447 let document = create_test_document(content);
448 let rule = MD044::new();
449 let violations = rule.check(&document).unwrap();
450 assert_eq!(violations.len(), 0);
451 }
452
453 #[test]
454 fn test_md044_incorrect_capitalization_violation() {
455 let content = r#"This document uses javascript and github incorrectly.
456
457We also use nodejs and mongodb in our stack.
458"#;
459
460 let document = create_test_document(content);
461 let rule = MD044::new();
462 let violations = rule.check(&document).unwrap();
463 assert_eq!(violations.len(), 4);
464 assert!(violations[0].message.contains("javascript"));
465 assert!(violations[0].message.contains("JavaScript"));
466 assert!(violations[1].message.contains("github"));
467 assert!(violations[1].message.contains("GitHub"));
468 assert!(violations[2].message.contains("nodejs"));
469 assert!(violations[2].message.contains("Node.js"));
470 assert!(violations[3].message.contains("mongodb"));
471 assert!(violations[3].message.contains("MongoDB"));
472 }
473
474 #[test]
475 fn test_md044_mixed_correct_incorrect() {
476 let content = r#"We use JavaScript (correct) but also javascript (incorrect).
477
478GitHub is right, but github is wrong.
479"#;
480
481 let document = create_test_document(content);
482 let rule = MD044::new();
483 let violations = rule.check(&document).unwrap();
484 assert_eq!(violations.len(), 2);
485 assert!(violations[0].message.contains("javascript"));
486 assert!(violations[1].message.contains("github"));
487 }
488
489 #[test]
490 fn test_md044_code_blocks_ignored() {
491 let content = r#"We use JavaScript in our application.
492
493```javascript
494// This javascript in code should be ignored
495console.log("github");
496```
497
498But javascript outside code blocks should be flagged.
499"#;
500
501 let document = create_test_document(content);
502 let rule = MD044::new();
503 let violations = rule.check(&document).unwrap();
504 assert_eq!(violations.len(), 1);
505 assert_eq!(violations[0].line, 8);
506 }
507
508 #[test]
509 fn test_md044_code_spans_ignored() {
510 let content = r#"We use JavaScript, and in code we write `javascript` or `github.com`.
511
512But javascript outside of `code spans` should be flagged.
513"#;
514
515 let document = create_test_document(content);
516 let rule = MD044::new();
517 let violations = rule.check(&document).unwrap();
518 assert_eq!(violations.len(), 1);
519 assert_eq!(violations[0].line, 3);
520 }
521
522 #[test]
523 fn test_md044_custom_names() {
524 let content = r#"We use mycompany products and someapi.
525
526This should flag mycompany and someapi.
527"#;
528
529 let mut custom_names = HashMap::new();
530 custom_names.insert("mycompany".to_string(), "MyCompany".to_string());
531 custom_names.insert("someapi".to_string(), "SomeAPI".to_string());
532
533 let document = create_test_document(content);
534 let rule = MD044::with_names(custom_names);
535 let violations = rule.check(&document).unwrap();
536 assert_eq!(violations.len(), 4); assert!(violations[0].message.contains("MyCompany"));
538 assert!(violations[1].message.contains("SomeAPI"));
539 }
540
541 #[test]
542 fn test_md044_word_boundaries() {
543 let content = r#"The word javascript should be flagged.
544
545But javascriptlike should not be flagged (it's a different word).
546
547And notjavascript should also not be flagged.
548"#;
549
550 let document = create_test_document(content);
551 let rule = MD044::new();
552 let violations = rule.check(&document).unwrap();
553 assert_eq!(violations.len(), 1);
554 assert_eq!(violations[0].line, 1);
555 }
556
557 #[test]
558 fn test_md044_case_insensitive_matching() {
559 let content = r#"We use Javascript, JAVASCRIPT, and JaVaScRiPt.
560
561All variations should be flagged.
562"#;
563
564 let document = create_test_document(content);
565 let rule = MD044::new();
566 let violations = rule.check(&document).unwrap();
567 assert_eq!(violations.len(), 3);
568 assert!(violations[0].message.contains("Javascript"));
569 assert!(violations[1].message.contains("JAVASCRIPT"));
570 assert!(violations[2].message.contains("JaVaScRiPt"));
571 }
572
573 #[test]
574 fn test_md044_multiple_occurrences_per_line() {
575 let content = r#"Using javascript and github and nodejs in the same line.
576"#;
577
578 let document = create_test_document(content);
579 let rule = MD044::new();
580 let violations = rule.check(&document).unwrap();
581 assert_eq!(violations.len(), 3);
582 assert!(violations[0].message.contains("javascript"));
583 assert!(violations[1].message.contains("github"));
584 assert!(violations[2].message.contains("nodejs"));
585 }
586
587 #[test]
588 fn test_md044_no_proper_names() {
589 let content = r#"This document doesn't contain any configured proper names.
590
591Just regular words and sentences here.
592
593Nothing to flag in this content.
594"#;
595
596 let document = create_test_document(content);
597 let rule = MD044::new();
598 let violations = rule.check(&document).unwrap();
599 assert_eq!(violations.len(), 0);
600 }
601
602 #[test]
603 fn test_md044_acronyms() {
604 let content = r#"We use api, rest, and json in our application.
605
606These should be API, REST, and JSON.
607"#;
608
609 let document = create_test_document(content);
610 let rule = MD044::new();
611 let violations = rule.check(&document).unwrap();
612 assert_eq!(violations.len(), 3); assert!(violations[0].message.contains("API"));
614 assert!(violations[1].message.contains("REST"));
615 assert!(violations[2].message.contains("JSON"));
616 }
617
618 #[test]
619 fn test_md044_multi_word_names() {
620 let content = r#"We deploy to google cloud platform.
621
622Should be Google Cloud not google cloud.
623"#;
624
625 let document = create_test_document(content);
626 let rule = MD044::new();
627 let violations = rule.check(&document).unwrap();
628 assert_eq!(violations.len(), 2);
629 assert!(violations[0].message.contains("google cloud"));
630 assert!(violations[1].message.contains("google cloud"));
631 }
632
633 #[test]
634 fn test_md044_url_false_positives() {
635 let content = r#"Check out our repository at https://github.com/user/repo.
636
637You can also visit http://example.com for more info.
638
639Visit https://crates.io/crates/mdbook-lint for the package.
640
641But github should still be flagged when not in URLs.
642And https should be flagged when used as HTTPS protocol name.
643"#;
644
645 let document = create_test_document(content);
646 let rule = MD044::new();
647 let violations = rule.check(&document).unwrap();
648
649 println!("Violations found after fix: {}", violations.len());
654 for (i, v) in violations.iter().enumerate() {
655 println!("Violation {}: line {}, {}", i, v.line, v.message);
656 }
657
658 assert_eq!(violations.len(), 2);
660 assert_eq!(violations[0].line, 7); assert_eq!(violations[1].line, 8); assert!(violations[0].message.contains("github"));
663 assert!(violations[1].message.contains("https"));
664 }
665
666 #[test]
667 fn test_md044_markdown_links_with_urls() {
668 let content = r#"Check out [GitHub](https://github.com) for repositories.
669
670Visit [the documentation](http://docs.example.com) for more info.
671
672Also see [Crates.io](https://crates.io) for Rust packages.
673
674But github and http should be flagged in regular text.
675"#;
676
677 let document = create_test_document(content);
678 let rule = MD044::new();
679 let violations = rule.check(&document).unwrap();
680
681 println!("Markdown link violations found: {}", violations.len());
683 for (i, v) in violations.iter().enumerate() {
684 println!("Violation {}: line {}, {}", i, v.line, v.message);
685 }
686
687 assert_eq!(violations.len(), 2);
689 assert_eq!(violations[0].line, 7); assert_eq!(violations[1].line, 7);
691 assert!(violations[0].message.contains("github") || violations[0].message.contains("http"));
692 assert!(violations[1].message.contains("github") || violations[1].message.contains("http"));
693 }
694
695 #[test]
696 fn test_md044_bare_urls() {
697 let content = r#"Visit https://github.com/user/repo directly.
698
699Or go to http://example.com for info.
700
701Plain URLs: https://crates.io and http://docs.rs should not be flagged.
702
703But mentioning github or https in text should be flagged.
704"#;
705
706 let document = create_test_document(content);
707 let rule = MD044::new();
708 let violations = rule.check(&document).unwrap();
709
710 println!("Bare URL violations found: {}", violations.len());
711 for (i, v) in violations.iter().enumerate() {
712 println!("Violation {}: line {}, {}", i, v.line, v.message);
713 }
714
715 assert_eq!(violations.len(), 2);
717 assert_eq!(violations[0].line, 7); assert_eq!(violations[1].line, 7);
719 assert!(
720 violations[0].message.contains("github") || violations[0].message.contains("https")
721 );
722 assert!(
723 violations[1].message.contains("github") || violations[1].message.contains("https")
724 );
725 }
726
727 #[test]
728 fn test_md044_url_context_detection_comprehensive() {
729 let content = r#"# URL Context Detection Tests
730
731## Bare URLs should not be flagged
732Visit https://github.com/user/repo for code.
733Check out http://example.com/path?query=value.
734Email me at mailto:user@github.com for questions.
735Use ftp://files.example.com/downloads for files.
736
737## Markdown links should not flag URLs
738See [GitHub](https://github.com) for repositories.
739Check [HTTP docs](http://example.com/docs) for info.
740Visit [the site](https://crates.io/search?q=rust) for packages.
741
742## Regular text should still be flagged
743I use github for version control.
744The https protocol is secure.
745We need better http handling.
746
747## Mixed scenarios
748Check https://github.com but remember that github is popular.
749Visit [GitHub](https://github.com) - github is widely used.
750The url https://example.com shows that http redirects work.
751
752## Edge cases
753URL at end: https://github.com
754URL in parentheses: (https://github.com/user/repo)
755URL with punctuation: Visit https://github.com.
756Multiple URLs: https://github.com and http://example.com are different.
757"#;
758
759 let document = create_test_document(content);
760 let rule = MD044::new();
761 let violations = rule.check(&document).unwrap();
762
763 println!("Comprehensive test violations: {}", violations.len());
764 for (i, v) in violations.iter().enumerate() {
765 println!(
766 "Violation {}: line {}, col {}, {}",
767 i, v.line, v.column, v.message
768 );
769 }
770
771 assert_eq!(violations.len(), 7);
782
783 for violation in &violations {
785 assert!(violation.line >= 15); }
787 }
788
789 #[test]
790 fn test_md044_url_detection_methods() {
791 let rule = MD044::new();
792
793 assert!(rule.is_in_url_context("Visit https://github.com for code", 10)); assert!(rule.is_in_url_context("Visit https://github.com for code", 17)); assert!(!rule.is_in_url_context("Visit https://github.com for code", 30)); assert!(rule.is_in_url_context("See [GitHub](https://github.com) here", 14)); assert!(rule.is_in_url_context("See [GitHub](https://github.com) here", 21)); assert!(!rule.is_in_url_context("See [GitHub](https://github.com) here", 4)); assert!(!rule.is_in_url_context("See [GitHub](https://github.com) here", 34)); assert!(!rule.is_in_url_context("I use github for development", 6)); assert!(!rule.is_in_url_context("The https protocol is secure", 4)); }
809
810 #[test]
811 fn test_md044_unicode_emoji_handling() {
812 let content = r#"📖 javascript documentation and github 🚀 repositories are great.
813
814Using nodejs with 🔥 performance and mongodb 💾 storage.
815"#;
816
817 let document = create_test_document(content);
818 let rule = MD044::new();
819 let violations = rule.check(&document).unwrap();
820
821 assert_eq!(violations.len(), 4);
823 assert!(violations[0].message.contains("javascript"));
824 assert!(violations[1].message.contains("github"));
825 assert!(violations[2].message.contains("nodejs"));
826 assert!(violations[3].message.contains("mongodb"));
827 }
828
829 #[test]
830 fn test_md044_unicode_mixed_scripts() {
831 let content = r#"在中文文档中使用 javascript 和 github。
832
833Русский текст с javascript и github тоже должен работать.
834
835العربية مع javascript و github أيضاً.
836"#;
837
838 let document = create_test_document(content);
839 let rule = MD044::new();
840 let violations = rule.check(&document).unwrap();
841
842 assert!(violations.len() >= 4); for violation in &violations {
846 assert!(
847 violation.message.contains("javascript") || violation.message.contains("github")
848 );
849 }
850 }
851
852 #[test]
853 fn test_md044_unicode_case_folding() {
854 let content = r#"Using javascript in our project.
855
856İstanbul'da javascript kullanıyoruz.
857"#;
858
859 let document = create_test_document(content);
860 let rule = MD044::new();
861 let violations = rule.check(&document).unwrap();
862
863 assert!(!violations.is_empty()); let js_violation = violations.iter().find(|v| v.message.contains("javascript"));
868 assert!(js_violation.is_some());
869 }
870
871 #[test]
872 fn test_md044_unicode_combining_characters() {
873 let content = r#"Using normal javascript here and also github.
874
875Testing regular javascript and github again.
876"#;
877
878 let document = create_test_document(content);
879 let rule = MD044::new();
880 let violations = rule.check(&document).unwrap();
881
882 assert_eq!(violations.len(), 4);
884 let js_violations: Vec<_> = violations
885 .iter()
886 .filter(|v| v.message.contains("javascript"))
887 .collect();
888 let gh_violations: Vec<_> = violations
889 .iter()
890 .filter(|v| v.message.contains("github"))
891 .collect();
892
893 assert_eq!(js_violations.len(), 2);
894 assert_eq!(gh_violations.len(), 2);
895 }
896
897 #[test]
898 fn test_md044_unicode_word_boundaries() {
899 let content = r#"Testing javascript🔥fast and github⭐popular.
900
901Also javascript‿linked and github🌟awesome.
902"#;
903
904 let document = create_test_document(content);
905 let rule = MD044::new();
906 let violations = rule.check(&document).unwrap();
907
908 assert_eq!(violations.len(), 4);
910 assert!(violations.iter().any(|v| v.message.contains("javascript")));
911 assert!(violations.iter().any(|v| v.message.contains("github")));
912 }
913
914 #[test]
915 fn test_md044_unicode_urls_with_emoji() {
916 let content = r#"Visit 📖 https://github.com/user/repo 🚀 for documentation.
917
918Check https://javascript.info 💡 for learning resources.
919
920But standalone github and javascript should be flagged.
921"#;
922
923 let document = create_test_document(content);
924 let rule = MD044::new();
925 let violations = rule.check(&document).unwrap();
926
927 assert_eq!(violations.len(), 2);
929 assert_eq!(violations[0].line, 5); assert_eq!(violations[1].line, 5);
931 assert!(
932 violations[0].message.contains("github")
933 || violations[0].message.contains("javascript")
934 );
935 assert!(
936 violations[1].message.contains("github")
937 || violations[1].message.contains("javascript")
938 );
939 }
940
941 #[test]
942 fn test_md044_still_works_for_non_urls() {
943 let content = r#"We use javascript and github in our development.
944
945The api uses json for data exchange.
946
947These should all be flagged since they're not in URLs.
948"#;
949
950 let document = create_test_document(content);
951 let rule = MD044::new();
952 let violations = rule.check(&document).unwrap();
953
954 assert_eq!(violations.len(), 4);
956 assert!(violations[0].message.contains("javascript"));
957 assert!(violations[1].message.contains("github"));
958 assert!(violations[2].message.contains("api"));
959 assert!(violations[3].message.contains("json"));
960 }
961}