rumdl_lib/utils/
regex_cache.rs1use fancy_regex::Regex as FancyRegex;
23use regex::Regex;
24use std::collections::HashMap;
25use std::sync::LazyLock;
26use std::sync::{Arc, Mutex};
27
28#[derive(Debug)]
30pub struct RegexCache {
31 cache: HashMap<String, Arc<Regex>>,
32 usage_stats: HashMap<String, u64>,
33}
34
35impl Default for RegexCache {
36 fn default() -> Self {
37 Self::new()
38 }
39}
40
41impl RegexCache {
42 pub fn new() -> Self {
43 Self {
44 cache: HashMap::new(),
45 usage_stats: HashMap::new(),
46 }
47 }
48
49 pub fn get_regex(&mut self, pattern: &str) -> Result<Arc<Regex>, regex::Error> {
51 if let Some(regex) = self.cache.get(pattern) {
52 *self.usage_stats.entry(pattern.to_string()).or_insert(0) += 1;
53 return Ok(regex.clone());
54 }
55
56 let regex = Arc::new(Regex::new(pattern)?);
57 self.cache.insert(pattern.to_string(), regex.clone());
58 *self.usage_stats.entry(pattern.to_string()).or_insert(0) += 1;
59 Ok(regex)
60 }
61
62 pub fn get_stats(&self) -> HashMap<String, u64> {
64 self.usage_stats.clone()
65 }
66
67 pub fn clear(&mut self) {
69 self.cache.clear();
70 self.usage_stats.clear();
71 }
72}
73
74static GLOBAL_REGEX_CACHE: LazyLock<Arc<Mutex<RegexCache>>> = LazyLock::new(|| Arc::new(Mutex::new(RegexCache::new())));
76
77pub fn get_cached_regex(pattern: &str) -> Result<Arc<Regex>, regex::Error> {
83 let mut cache = GLOBAL_REGEX_CACHE.lock().unwrap_or_else(|poisoned| {
84 let mut guard = poisoned.into_inner();
86 guard.clear();
87 guard
88 });
89 cache.get_regex(pattern)
90}
91
92pub fn get_cache_stats() -> HashMap<String, u64> {
96 match GLOBAL_REGEX_CACHE.lock() {
97 Ok(cache) => cache.get_stats(),
98 Err(_) => HashMap::new(),
99 }
100}
101
102#[macro_export]
120macro_rules! regex_lazy {
121 ($pattern:expr) => {{
122 static REGEX: LazyLock<regex::Regex> = LazyLock::new(|| regex::Regex::new($pattern).unwrap());
123 &*REGEX
124 }};
125}
126
127#[macro_export]
134macro_rules! regex_cached {
135 ($pattern:expr) => {{ $crate::utils::regex_cache::get_cached_regex($pattern).expect("Failed to compile regex") }};
136}
137
138pub use crate::regex_lazy;
140
141pub const URL_STANDARD_STR: &str = concat!(
175 r#"(?:https?|ftps?|ftp)://"#, r#"(?:"#,
177 r#"\[[0-9a-fA-F:%.\-a-zA-Z]+\]"#, r#"|"#,
179 r#"[^\s<>\[\]()\\'\"`/]+"#, r#")"#,
181 r#"(?::\d+)?"#, r#"(?:/[^\s<>\[\]\\'\"`]*)?"#, r#"(?:\?[^\s<>\[\]\\'\"`]*)?"#, r#"(?:#[^\s<>\[\]\\'\"`]*)?"#, );
186
187pub const URL_WWW_STR: &str = concat!(
200 r#"www\.(?:[a-zA-Z0-9][-a-zA-Z0-9]*\.)+[a-zA-Z]{2,}"#, r#"(?::\d+)?"#, r#"(?:/[^\s<>\[\]\\'\"`]*)?"#, r#"(?:\?[^\s<>\[\]\\'\"`]*)?"#, r#"(?:#[^\s<>\[\]\\'\"`]*)?"#, );
206
207pub const URL_IPV6_STR: &str = concat!(
212 r#"(?:https?|ftps?|ftp)://"#,
213 r#"\[[0-9a-fA-F:%.\-a-zA-Z]+\]"#, r#"(?::\d+)?"#, r#"(?:/[^\s<>\[\]\\'\"`]*)?"#, r#"(?:\?[^\s<>\[\]\\'\"`]*)?"#, r#"(?:#[^\s<>\[\]\\'\"`]*)?"#, );
219
220pub const XMPP_URI_STR: &str = r#"xmpp:[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}(?:/[^\s<>\[\]\\'\"`]*)?"#;
229
230pub const URL_QUICK_CHECK_STR: &str = r#"(?:https?|ftps?|ftp|xmpp)://|xmpp:|@|www\."#;
236
237pub const URL_SIMPLE_STR: &str = r#"(?:https?|ftps?|ftp)://[^\s<>]+[^\s<>.,]"#;
243
244pub static URL_STANDARD_REGEX: LazyLock<Regex> = LazyLock::new(|| Regex::new(URL_STANDARD_STR).unwrap());
249
250pub static URL_WWW_REGEX: LazyLock<Regex> = LazyLock::new(|| Regex::new(URL_WWW_STR).unwrap());
253
254pub static URL_IPV6_REGEX: LazyLock<Regex> = LazyLock::new(|| Regex::new(URL_IPV6_STR).unwrap());
257
258pub static URL_QUICK_CHECK_REGEX: LazyLock<Regex> = LazyLock::new(|| Regex::new(URL_QUICK_CHECK_STR).unwrap());
261
262pub static URL_SIMPLE_REGEX: LazyLock<Regex> = LazyLock::new(|| Regex::new(URL_SIMPLE_STR).unwrap());
265
266pub static URL_PATTERN: LazyLock<Regex> = LazyLock::new(|| URL_SIMPLE_REGEX.clone());
268
269pub static XMPP_URI_REGEX: LazyLock<Regex> = LazyLock::new(|| Regex::new(XMPP_URI_STR).unwrap());
272
273pub static ATX_HEADING_REGEX: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"^(\s*)(#{1,6})(\s+|$)").unwrap());
275
276pub static UNORDERED_LIST_MARKER_REGEX: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"^(\s*)([*+-])(\s+)").unwrap());
278pub static ORDERED_LIST_MARKER_REGEX: LazyLock<Regex> =
279 LazyLock::new(|| Regex::new(r"^(\s*)(\d+)([.)])(\s+)").unwrap());
280
281pub static ASTERISK_EMPHASIS: LazyLock<Regex> =
287 LazyLock::new(|| Regex::new(r"(?:^|[^*])\*(\s+[^*]+\s*|\s*[^*]+\s+)\*(?:[^*]|$)").unwrap());
288pub static UNDERSCORE_EMPHASIS: LazyLock<Regex> =
289 LazyLock::new(|| Regex::new(r"(?:^|[^_])_(\s+[^_]+\s*|\s*[^_]+\s+)_(?:[^_]|$)").unwrap());
290pub static DOUBLE_UNDERSCORE_EMPHASIS: LazyLock<Regex> =
291 LazyLock::new(|| Regex::new(r"(?:^|[^_])__(\s+[^_]+\s*|\s*[^_]+\s+)__(?:[^_]|$)").unwrap());
292pub static FENCED_CODE_BLOCK_START: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"^(\s*)```(?:[^`\r\n]*)$").unwrap());
294pub static FENCED_CODE_BLOCK_END: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"^(\s*)```\s*$").unwrap());
295
296pub static HTML_TAG_REGEX: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"<([a-zA-Z][^>]*)>").unwrap());
298pub static HTML_TAG_QUICK_CHECK: LazyLock<Regex> = LazyLock::new(|| Regex::new("(?i)</?[a-zA-Z]").unwrap());
299
300pub static IMAGE_REGEX: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"!\[([^\]]*)\]\(([^)]+)\)").unwrap());
302
303pub static BLOCKQUOTE_PREFIX_RE: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"^(\s*>+\s*)").unwrap());
305
306pub fn is_blank_in_blockquote_context(line: &str) -> bool {
329 if line.trim().is_empty() {
330 return true;
331 }
332 if let Some(m) = BLOCKQUOTE_PREFIX_RE.find(line) {
335 let remainder = &line[m.end()..];
336 is_blank_in_blockquote_context(remainder)
338 } else {
339 false
340 }
341}
342
343pub static IMAGE_REF_PATTERN: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"^!\[.*?\]\[.*?\]$").unwrap());
345pub static LINK_REF_PATTERN: LazyLock<Regex> =
346 LazyLock::new(|| Regex::new(r#"^\[.*?\]:\s*\S+(\s+["'(].*)?\s*$"#).unwrap());
347pub static ABBREVIATION: LazyLock<Regex> = LazyLock::new(|| {
348 Regex::new(r"\b(?:Mr|Mrs|Ms|Dr|Prof|Sr|Jr|vs|etc|i\.e|e\.g|Inc|Corp|Ltd|Co|St|Ave|Blvd|Rd|Ph\.D|M\.D|B\.A|M\.A|Ph\.D|U\.S|U\.K|U\.N|N\.Y|L\.A|D\.C)\.\s+[A-Z]").unwrap()
349});
350pub static LIST_ITEM: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"^\s*\d+\.\s+").unwrap());
351
352pub static EMAIL_PATTERN: LazyLock<Regex> =
354 LazyLock::new(|| Regex::new(r"[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}").unwrap());
355
356pub static REF_LINK_REGEX: LazyLock<FancyRegex> =
360 LazyLock::new(|| FancyRegex::new(r"(?<!\\)\[((?:[^\[\]\\]|\\.|\[[^\]]*\])*)\]\[([^\]]*)\]").unwrap());
361
362pub static SHORTCUT_REF_REGEX: LazyLock<FancyRegex> =
367 LazyLock::new(|| FancyRegex::new(r"(?<![\\)\]])\[((?:[^\[\]\\]|\\.|\[[^\]]*\])*)\](?!\s*[\[\(])").unwrap());
368
369pub static INLINE_LINK_FANCY_REGEX: LazyLock<FancyRegex> =
371 LazyLock::new(|| FancyRegex::new(r"(?<!\\)\[([^\]]+)\]\(([^)]+)\)").unwrap());
372
373pub static INLINE_IMAGE_REGEX: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"!\[([^\]]*)\]\(([^)]+)\)").unwrap());
375
376pub static LINKED_IMAGE_INLINE_INLINE: LazyLock<Regex> =
384 LazyLock::new(|| Regex::new(r"\[!\[([^\]]*)\]\(([^)]+)\)\]\(([^)]+)\)").unwrap());
385
386pub static LINKED_IMAGE_REF_INLINE: LazyLock<Regex> =
388 LazyLock::new(|| Regex::new(r"\[!\[([^\]]*)\]\[([^\]]*)\]\]\(([^)]+)\)").unwrap());
389
390pub static LINKED_IMAGE_INLINE_REF: LazyLock<Regex> =
392 LazyLock::new(|| Regex::new(r"\[!\[([^\]]*)\]\(([^)]+)\)\]\[([^\]]*)\]").unwrap());
393
394pub static LINKED_IMAGE_REF_REF: LazyLock<Regex> =
396 LazyLock::new(|| Regex::new(r"\[!\[([^\]]*)\]\[([^\]]*)\]\]\[([^\]]*)\]").unwrap());
397
398pub static REF_IMAGE_REGEX: LazyLock<Regex> =
400 LazyLock::new(|| Regex::new(r"!\[((?:[^\[\]\\]|\\.|\[[^\]]*\])*)\]\[([^\]]*)\]").unwrap());
401
402pub static FOOTNOTE_REF_REGEX: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"\[\^([^\]]+)\]").unwrap());
404
405pub static WIKI_LINK_REGEX: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"\[\[([^\]]+)\]\]").unwrap());
407
408pub static INLINE_MATH_REGEX: LazyLock<FancyRegex> =
410 LazyLock::new(|| FancyRegex::new(r"(?<!\$)\$(?!\$)([^\$]+)\$(?!\$)").unwrap());
411pub static DISPLAY_MATH_REGEX: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"\$\$([^\$]+)\$\$").unwrap());
412
413pub static EMOJI_SHORTCODE_REGEX: LazyLock<Regex> = LazyLock::new(|| Regex::new(r":([a-zA-Z0-9_+-]+):").unwrap());
415
416pub static HTML_TAG_PATTERN: LazyLock<Regex> =
418 LazyLock::new(|| Regex::new(r"</?[a-zA-Z][^>]*>|<[a-zA-Z][^>]*/\s*>").unwrap());
419
420pub static HTML_ENTITY_REGEX: LazyLock<Regex> =
422 LazyLock::new(|| Regex::new(r"&[a-zA-Z][a-zA-Z0-9]*;|&#\d+;|&#x[0-9a-fA-F]+;").unwrap());
423
424pub static HUGO_SHORTCODE_REGEX: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"\{\{[<%][\s\S]*?[%>]\}\}").unwrap());
428
429pub static HTML_COMMENT_PATTERN: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"<!--[\s\S]*?-->").unwrap());
431
432pub static HTML_HEADING_PATTERN: LazyLock<FancyRegex> =
435 LazyLock::new(|| FancyRegex::new(r"^\s*<h([1-6])(?:\s[^>]*)?>.*</h\1>\s*$").unwrap());
436
437pub fn escape_regex(s: &str) -> String {
439 let mut result = String::with_capacity(s.len() * 2);
440
441 for c in s.chars() {
442 if matches!(
444 c,
445 '.' | '+' | '*' | '?' | '^' | '$' | '(' | ')' | '[' | ']' | '{' | '}' | '|' | '\\'
446 ) {
447 result.push('\\');
448 }
449 result.push(c);
450 }
451
452 result
453}
454
455#[cfg(test)]
456mod tests {
457 use super::*;
458
459 #[test]
460 fn test_regex_cache_new() {
461 let cache = RegexCache::new();
462 assert!(cache.cache.is_empty());
463 assert!(cache.usage_stats.is_empty());
464 }
465
466 #[test]
467 fn test_regex_cache_default() {
468 let cache = RegexCache::default();
469 assert!(cache.cache.is_empty());
470 assert!(cache.usage_stats.is_empty());
471 }
472
473 #[test]
474 fn test_get_regex_compilation() {
475 let mut cache = RegexCache::new();
476
477 let regex1 = cache.get_regex(r"\d+").unwrap();
479 assert_eq!(cache.cache.len(), 1);
480 assert_eq!(cache.usage_stats.get(r"\d+"), Some(&1));
481
482 let regex2 = cache.get_regex(r"\d+").unwrap();
484 assert_eq!(cache.cache.len(), 1);
485 assert_eq!(cache.usage_stats.get(r"\d+"), Some(&2));
486
487 assert!(Arc::ptr_eq(®ex1, ®ex2));
489 }
490
491 #[test]
492 fn test_get_regex_invalid_pattern() {
493 let mut cache = RegexCache::new();
494 let result = cache.get_regex(r"[unterminated");
495 assert!(result.is_err());
496 assert!(cache.cache.is_empty());
497 }
498
499 #[test]
500 fn test_get_stats() {
501 let mut cache = RegexCache::new();
502
503 let _ = cache.get_regex(r"\d+").unwrap();
505 let _ = cache.get_regex(r"\d+").unwrap();
506 let _ = cache.get_regex(r"\w+").unwrap();
507
508 let stats = cache.get_stats();
509 assert_eq!(stats.get(r"\d+"), Some(&2));
510 assert_eq!(stats.get(r"\w+"), Some(&1));
511 }
512
513 #[test]
514 fn test_clear_cache() {
515 let mut cache = RegexCache::new();
516
517 let _ = cache.get_regex(r"\d+").unwrap();
519
520 assert!(!cache.cache.is_empty());
521 assert!(!cache.usage_stats.is_empty());
522
523 cache.clear();
525
526 assert!(cache.cache.is_empty());
527 assert!(cache.usage_stats.is_empty());
528 }
529
530 #[test]
531 fn test_global_cache_functions() {
532 let regex1 = get_cached_regex(r"\d{3}").unwrap();
534 let regex2 = get_cached_regex(r"\d{3}").unwrap();
535 assert!(Arc::ptr_eq(®ex1, ®ex2));
536
537 let stats = get_cache_stats();
539 assert!(stats.contains_key(r"\d{3}"));
540 }
541
542 #[test]
543 fn test_regex_lazy_macro() {
544 let re = regex_lazy!(r"^test.*end$");
545 assert!(re.is_match("test something end"));
546 assert!(!re.is_match("test something"));
547
548 let re2 = regex_lazy!(r"^start.*finish$");
552 assert!(re2.is_match("start and finish"));
553 assert!(!re2.is_match("start without end"));
554 }
555
556 #[test]
557 fn test_escape_regex() {
558 assert_eq!(escape_regex("a.b"), "a\\.b");
559 assert_eq!(escape_regex("a+b*c"), "a\\+b\\*c");
560 assert_eq!(escape_regex("(test)"), "\\(test\\)");
561 assert_eq!(escape_regex("[a-z]"), "\\[a-z\\]");
562 assert_eq!(escape_regex("normal text"), "normal text");
563
564 assert_eq!(escape_regex(".$^{[(|)*+?\\"), "\\.\\$\\^\\{\\[\\(\\|\\)\\*\\+\\?\\\\");
566
567 assert_eq!(escape_regex(""), "");
569
570 assert_eq!(escape_regex("test.com/path?query=1"), "test\\.com/path\\?query=1");
572 }
573
574 #[test]
575 fn test_static_regex_patterns() {
576 assert!(URL_SIMPLE_REGEX.is_match("https://example.com"));
578 assert!(URL_SIMPLE_REGEX.is_match("http://test.org/path"));
579 assert!(URL_SIMPLE_REGEX.is_match("ftp://files.com"));
580 assert!(!URL_SIMPLE_REGEX.is_match("not a url"));
581
582 assert!(ATX_HEADING_REGEX.is_match("# Heading"));
584 assert!(ATX_HEADING_REGEX.is_match(" ## Indented"));
585 assert!(ATX_HEADING_REGEX.is_match("### "));
586 assert!(!ATX_HEADING_REGEX.is_match("Not a heading"));
587
588 assert!(UNORDERED_LIST_MARKER_REGEX.is_match("* Item"));
590 assert!(UNORDERED_LIST_MARKER_REGEX.is_match("- Item"));
591 assert!(UNORDERED_LIST_MARKER_REGEX.is_match("+ Item"));
592 assert!(ORDERED_LIST_MARKER_REGEX.is_match("1. Item"));
593 assert!(ORDERED_LIST_MARKER_REGEX.is_match("99. Item"));
594
595 assert!(HTML_TAG_REGEX.is_match("<div>"));
597 assert!(HTML_TAG_REGEX.is_match("<span class='test'>"));
598
599 assert!(BLOCKQUOTE_PREFIX_RE.is_match("> Quote"));
601 assert!(BLOCKQUOTE_PREFIX_RE.is_match(" > Indented quote"));
602 assert!(BLOCKQUOTE_PREFIX_RE.is_match(">> Nested"));
603 }
604
605 #[test]
606 fn test_thread_safety() {
607 use std::thread;
608
609 let handles: Vec<_> = (0..10)
610 .map(|i| {
611 thread::spawn(move || {
612 let pattern = format!(r"\d{{{i}}}");
613 let regex = get_cached_regex(&pattern).unwrap();
614 assert!(regex.is_match(&"1".repeat(i)));
615 })
616 })
617 .collect();
618
619 for handle in handles {
620 handle.join().unwrap();
621 }
622 }
623
624 #[test]
629 fn test_url_standard_basic() {
630 assert!(URL_STANDARD_REGEX.is_match("https://example.com"));
632 assert!(URL_STANDARD_REGEX.is_match("http://example.com"));
633 assert!(URL_STANDARD_REGEX.is_match("https://example.com/"));
634 assert!(URL_STANDARD_REGEX.is_match("https://example.com/path"));
635 assert!(URL_STANDARD_REGEX.is_match("ftp://files.example.com"));
636 assert!(URL_STANDARD_REGEX.is_match("ftps://secure.example.com"));
637
638 assert!(!URL_STANDARD_REGEX.is_match("not a url"));
640 assert!(!URL_STANDARD_REGEX.is_match("example.com"));
641 assert!(!URL_STANDARD_REGEX.is_match("www.example.com"));
642 }
643
644 #[test]
645 fn test_url_standard_with_path() {
646 assert!(URL_STANDARD_REGEX.is_match("https://example.com/path/to/page"));
647 assert!(URL_STANDARD_REGEX.is_match("https://example.com/path/to/page.html"));
648 assert!(URL_STANDARD_REGEX.is_match("https://example.com/path/to/page/"));
649 }
650
651 #[test]
652 fn test_url_standard_with_query() {
653 assert!(URL_STANDARD_REGEX.is_match("https://example.com?query=value"));
654 assert!(URL_STANDARD_REGEX.is_match("https://example.com/path?query=value"));
655 assert!(URL_STANDARD_REGEX.is_match("https://example.com/path?a=1&b=2"));
656 }
657
658 #[test]
659 fn test_url_standard_with_fragment() {
660 assert!(URL_STANDARD_REGEX.is_match("https://example.com#section"));
661 assert!(URL_STANDARD_REGEX.is_match("https://example.com/path#section"));
662 assert!(URL_STANDARD_REGEX.is_match("https://example.com/path?query=value#section"));
663 }
664
665 #[test]
666 fn test_url_standard_with_port() {
667 assert!(URL_STANDARD_REGEX.is_match("https://example.com:8080"));
668 assert!(URL_STANDARD_REGEX.is_match("https://example.com:443/path"));
669 assert!(URL_STANDARD_REGEX.is_match("http://localhost:3000"));
670 assert!(URL_STANDARD_REGEX.is_match("https://192.168.1.1:8080/path"));
671 }
672
673 #[test]
674 fn test_url_standard_wikipedia_style_parentheses() {
675 let url = "https://en.wikipedia.org/wiki/Rust_(programming_language)";
677 assert!(URL_STANDARD_REGEX.is_match(url));
678
679 let cap = URL_STANDARD_REGEX.find(url).unwrap();
681 assert_eq!(cap.as_str(), url);
682
683 let url2 = "https://example.com/path_(foo)_(bar)";
685 let cap2 = URL_STANDARD_REGEX.find(url2).unwrap();
686 assert_eq!(cap2.as_str(), url2);
687 }
688
689 #[test]
690 fn test_url_standard_ipv6() {
691 assert!(URL_STANDARD_REGEX.is_match("https://[::1]/path"));
693 assert!(URL_STANDARD_REGEX.is_match("https://[2001:db8::1]:8080/path"));
694 assert!(URL_STANDARD_REGEX.is_match("http://[fe80::1%eth0]/"));
695 }
696
697 #[test]
698 fn test_url_www_basic() {
699 assert!(URL_WWW_REGEX.is_match("www.example.com"));
701 assert!(URL_WWW_REGEX.is_match("www.example.co.uk"));
702 assert!(URL_WWW_REGEX.is_match("www.sub.example.com"));
703
704 assert!(!URL_WWW_REGEX.is_match("example.com"));
706
707 assert!(URL_WWW_REGEX.is_match("https://www.example.com"));
711 }
712
713 #[test]
714 fn test_url_www_with_path() {
715 assert!(URL_WWW_REGEX.is_match("www.example.com/path"));
716 assert!(URL_WWW_REGEX.is_match("www.example.com/path/to/page"));
717 assert!(URL_WWW_REGEX.is_match("www.example.com/path_(with_parens)"));
718 }
719
720 #[test]
721 fn test_url_ipv6_basic() {
722 assert!(URL_IPV6_REGEX.is_match("https://[::1]/"));
724 assert!(URL_IPV6_REGEX.is_match("http://[2001:db8::1]/path"));
725 assert!(URL_IPV6_REGEX.is_match("https://[fe80::1]:8080/path"));
726 assert!(URL_IPV6_REGEX.is_match("ftp://[::ffff:192.168.1.1]/file"));
727 }
728
729 #[test]
730 fn test_url_ipv6_with_zone_id() {
731 assert!(URL_IPV6_REGEX.is_match("https://[fe80::1%eth0]/path"));
733 assert!(URL_IPV6_REGEX.is_match("http://[fe80::1%25eth0]:8080/"));
734 }
735
736 #[test]
737 fn test_url_simple_detection() {
738 assert!(URL_SIMPLE_REGEX.is_match("https://example.com"));
740 assert!(URL_SIMPLE_REGEX.is_match("http://test.org/path"));
741 assert!(URL_SIMPLE_REGEX.is_match("ftp://files.com/file.zip"));
742 assert!(!URL_SIMPLE_REGEX.is_match("not a url"));
743 }
744
745 #[test]
746 fn test_url_quick_check() {
747 assert!(URL_QUICK_CHECK_REGEX.is_match("https://example.com"));
749 assert!(URL_QUICK_CHECK_REGEX.is_match("http://example.com"));
750 assert!(URL_QUICK_CHECK_REGEX.is_match("ftp://files.com"));
751 assert!(URL_QUICK_CHECK_REGEX.is_match("www.example.com"));
752 assert!(URL_QUICK_CHECK_REGEX.is_match("user@example.com"));
753 assert!(!URL_QUICK_CHECK_REGEX.is_match("just plain text"));
754 }
755
756 #[test]
757 fn test_url_edge_cases() {
758 let url = "https://example.com/path";
760 assert!(URL_STANDARD_REGEX.is_match(url));
761
762 let text = "Check https://example.com, it's great!";
765 let cap = URL_STANDARD_REGEX.find(text).unwrap();
766 assert!(cap.as_str().ends_with(','));
768
769 let text2 = "See <https://example.com> for more";
771 assert!(URL_STANDARD_REGEX.is_match(text2));
772
773 let cap2 = URL_STANDARD_REGEX.find(text2).unwrap();
775 assert!(!cap2.as_str().contains('>'));
776 }
777
778 #[test]
779 fn test_url_with_complex_paths() {
780 let urls = [
782 "https://github.com/owner/repo/blob/main/src/file.rs#L123",
783 "https://docs.example.com/api/v2/endpoint?format=json&page=1",
784 "https://cdn.example.com/assets/images/logo.png?v=2023",
785 "https://search.example.com/results?q=test+query&filter=all",
786 ];
787
788 for url in urls {
789 assert!(URL_STANDARD_REGEX.is_match(url), "Should match: {url}");
790 }
791 }
792
793 #[test]
794 fn test_url_pattern_strings_are_valid() {
795 assert!(URL_STANDARD_REGEX.is_match("https://example.com"));
797 assert!(URL_WWW_REGEX.is_match("www.example.com"));
798 assert!(URL_IPV6_REGEX.is_match("https://[::1]/"));
799 assert!(URL_QUICK_CHECK_REGEX.is_match("https://example.com"));
800 assert!(URL_SIMPLE_REGEX.is_match("https://example.com"));
801 }
802
803 #[test]
810 fn test_is_blank_in_blockquote_context_regular_blanks() {
811 assert!(is_blank_in_blockquote_context(""));
813 assert!(is_blank_in_blockquote_context(" "));
814 assert!(is_blank_in_blockquote_context("\t"));
815 assert!(is_blank_in_blockquote_context(" \t "));
816 }
817
818 #[test]
819 fn test_is_blank_in_blockquote_context_blockquote_blanks() {
820 assert!(is_blank_in_blockquote_context(">"));
822 assert!(is_blank_in_blockquote_context("> "));
823 assert!(is_blank_in_blockquote_context("> "));
824 assert!(is_blank_in_blockquote_context(">>"));
825 assert!(is_blank_in_blockquote_context(">> "));
826 assert!(is_blank_in_blockquote_context(">>>"));
827 assert!(is_blank_in_blockquote_context(">>> "));
828 }
829
830 #[test]
831 fn test_is_blank_in_blockquote_context_spaced_nested() {
832 assert!(is_blank_in_blockquote_context("> > "));
834 assert!(is_blank_in_blockquote_context("> > > "));
835 assert!(is_blank_in_blockquote_context("> > "));
836 }
837
838 #[test]
839 fn test_is_blank_in_blockquote_context_with_leading_space() {
840 assert!(is_blank_in_blockquote_context(" >"));
842 assert!(is_blank_in_blockquote_context(" > "));
843 assert!(is_blank_in_blockquote_context(" >>"));
844 }
845
846 #[test]
847 fn test_is_blank_in_blockquote_context_not_blank() {
848 assert!(!is_blank_in_blockquote_context("text"));
850 assert!(!is_blank_in_blockquote_context("> text"));
851 assert!(!is_blank_in_blockquote_context(">> text"));
852 assert!(!is_blank_in_blockquote_context("> | table |"));
853 assert!(!is_blank_in_blockquote_context("| table |"));
854 assert!(!is_blank_in_blockquote_context("> # Heading"));
855 assert!(!is_blank_in_blockquote_context(">text")); }
857
858 #[test]
859 fn test_is_blank_in_blockquote_context_edge_cases() {
860 assert!(!is_blank_in_blockquote_context(">a")); assert!(!is_blank_in_blockquote_context("> a")); assert!(is_blank_in_blockquote_context("> ")); assert!(!is_blank_in_blockquote_context("> text")); }
866}