rumdl_lib/utils/
regex_cache.rs1use fancy_regex::Regex as FancyRegex;
23use lazy_static::lazy_static;
24use regex::Regex;
25use std::collections::HashMap;
26use std::sync::{Arc, Mutex};
27
28#[derive(Debug)]
30pub struct RegexCache {
31 cache: HashMap<String, Arc<Regex>>,
32 fancy_cache: HashMap<String, Arc<FancyRegex>>,
33 usage_stats: HashMap<String, u64>,
34}
35
36impl Default for RegexCache {
37 fn default() -> Self {
38 Self::new()
39 }
40}
41
42impl RegexCache {
43 pub fn new() -> Self {
44 Self {
45 cache: HashMap::new(),
46 fancy_cache: HashMap::new(),
47 usage_stats: HashMap::new(),
48 }
49 }
50
51 pub fn get_regex(&mut self, pattern: &str) -> Result<Arc<Regex>, regex::Error> {
53 if let Some(regex) = self.cache.get(pattern) {
54 *self.usage_stats.entry(pattern.to_string()).or_insert(0) += 1;
55 return Ok(regex.clone());
56 }
57
58 let regex = Arc::new(Regex::new(pattern)?);
59 self.cache.insert(pattern.to_string(), regex.clone());
60 *self.usage_stats.entry(pattern.to_string()).or_insert(0) += 1;
61 Ok(regex)
62 }
63
64 pub fn get_fancy_regex(&mut self, pattern: &str) -> Result<Arc<FancyRegex>, Box<fancy_regex::Error>> {
66 if let Some(regex) = self.fancy_cache.get(pattern) {
67 *self.usage_stats.entry(pattern.to_string()).or_insert(0) += 1;
68 return Ok(regex.clone());
69 }
70
71 match FancyRegex::new(pattern) {
72 Ok(regex) => {
73 let arc_regex = Arc::new(regex);
74 self.fancy_cache.insert(pattern.to_string(), arc_regex.clone());
75 *self.usage_stats.entry(pattern.to_string()).or_insert(0) += 1;
76 Ok(arc_regex)
77 }
78 Err(e) => Err(Box::new(e)),
79 }
80 }
81
82 pub fn get_stats(&self) -> HashMap<String, u64> {
84 self.usage_stats.clone()
85 }
86
87 pub fn clear(&mut self) {
89 self.cache.clear();
90 self.fancy_cache.clear();
91 self.usage_stats.clear();
92 }
93}
94
95lazy_static! {
96 static ref GLOBAL_REGEX_CACHE: Arc<Mutex<RegexCache>> = Arc::new(Mutex::new(RegexCache::new()));
98}
99
100pub fn get_cached_regex(pattern: &str) -> Result<Arc<Regex>, regex::Error> {
102 let mut cache = GLOBAL_REGEX_CACHE.lock().unwrap();
103 cache.get_regex(pattern)
104}
105
106pub fn get_cached_fancy_regex(pattern: &str) -> Result<Arc<FancyRegex>, Box<fancy_regex::Error>> {
108 let mut cache = GLOBAL_REGEX_CACHE.lock().unwrap();
109 cache.get_fancy_regex(pattern)
110}
111
112pub fn get_cache_stats() -> HashMap<String, u64> {
114 let cache = GLOBAL_REGEX_CACHE.lock().unwrap();
115 cache.get_stats()
116}
117
118#[macro_export]
127macro_rules! regex_lazy {
128 ($pattern:expr) => {{
129 lazy_static::lazy_static! {
130 static ref REGEX: regex::Regex = regex::Regex::new($pattern).unwrap();
131 }
132 &*REGEX
133 }};
134}
135
136#[macro_export]
138macro_rules! regex_cached {
139 ($pattern:expr) => {{ $crate::utils::regex_cache::get_cached_regex($pattern).expect("Failed to compile regex") }};
140}
141
142#[macro_export]
144macro_rules! fancy_regex_cached {
145 ($pattern:expr) => {{ $crate::utils::regex_cache::get_cached_fancy_regex($pattern).expect("Failed to compile fancy regex") }};
146}
147
148pub use crate::regex_lazy;
150
151lazy_static! {
152 pub static ref URL_REGEX: Regex = Regex::new(r#"(?:https?|ftp)://[^\s<>\[\]()'"]+[^\s<>\[\]()"'.,]"#).unwrap();
154 pub static ref BARE_URL_REGEX: Regex = Regex::new(r"(?:https?|ftp)://[^\s<>]+[^\s<>.]").unwrap();
155 pub static ref URL_PATTERN: Regex = Regex::new(r"((?:https?|ftp)://[^\s\)<>]+[^\s\)<>.,])").unwrap();
156
157 pub static ref ATX_HEADING_REGEX: Regex = Regex::new(r"^(\s*)(#{1,6})(\s+|$)").unwrap();
159 pub static ref CLOSED_ATX_HEADING_REGEX: Regex = Regex::new(r"^(\s*)(#{1,6})(\s+)(.*)(\s+)(#+)(\s*)$").unwrap();
160 pub static ref SETEXT_HEADING_REGEX: Regex = Regex::new(r"^(\s*)[^\s]+.*\n(\s*)(=+|-+)\s*$").unwrap();
161 pub static ref TRAILING_PUNCTUATION_REGEX: Regex = Regex::new(r"[.,:;!?]$").unwrap();
162
163 pub static ref ATX_HEADING_WITH_CAPTURE: Regex = Regex::new(r"^(#{1,6})\s+(.+?)(?:\s+#*\s*)?$").unwrap();
165 pub static ref SETEXT_HEADING_WITH_CAPTURE: FancyRegex = FancyRegex::new(r"^([^\n]+)\n([=\-])\2+\s*$").unwrap();
166
167 pub static ref UNORDERED_LIST_MARKER_REGEX: Regex = Regex::new(r"^(\s*)([*+-])(\s+)").unwrap();
169 pub static ref ORDERED_LIST_MARKER_REGEX: Regex = Regex::new(r"^(\s*)(\d+)([.)])(\s+)").unwrap();
170 pub static ref LIST_MARKER_ANY_REGEX: Regex = Regex::new(r"^(\s*)(?:([*+-])|(\d+)[.)])(\s+)").unwrap();
171
172 pub static ref FENCED_CODE_BLOCK_START_REGEX: Regex = Regex::new(r"^(\s*)(```|~~~)(.*)$").unwrap();
174 pub static ref FENCED_CODE_BLOCK_END_REGEX: Regex = Regex::new(r"^(\s*)(```|~~~)(\s*)$").unwrap();
175 pub static ref INDENTED_CODE_BLOCK_REGEX: Regex = Regex::new(r"^(\s{4,})(.*)$").unwrap();
176 pub static ref CODE_FENCE_REGEX: Regex = Regex::new(r"^(`{3,}|~{3,})").unwrap();
177
178 pub static ref EMPHASIS_REGEX: FancyRegex = FancyRegex::new(r"(\s|^)(\*{1,2}|_{1,2})(?=\S)(.+?)(?<=\S)(\2)(\s|$)").unwrap();
180 pub static ref SPACE_IN_EMPHASIS_REGEX: FancyRegex = FancyRegex::new(r"(\*|_)(\s+)(.+?)(\s+)(\1)").unwrap();
181
182 pub static ref ASTERISK_EMPHASIS: Regex = Regex::new(r"(?:^|[^*])\*(\s+[^*]+\s*|\s*[^*]+\s+)\*(?:[^*]|$)").unwrap();
186 pub static ref UNDERSCORE_EMPHASIS: Regex = Regex::new(r"(?:^|[^_])_(\s+[^_]+\s*|\s*[^_]+\s+)_(?:[^_]|$)").unwrap();
187 pub static ref DOUBLE_UNDERSCORE_EMPHASIS: Regex = Regex::new(r"(?:^|[^_])__(\s+[^_]+\s*|\s*[^_]+\s+)__(?:[^_]|$)").unwrap();
188 pub static ref DOUBLE_ASTERISK_EMPHASIS: FancyRegex = FancyRegex::new(r"\*\*\s+([^*]+?)\s+\*\*").unwrap();
189 pub static ref DOUBLE_ASTERISK_SPACE_START: FancyRegex = FancyRegex::new(r"\*\*\s+([^*]+?)\*\*").unwrap();
190 pub static ref DOUBLE_ASTERISK_SPACE_END: FancyRegex = FancyRegex::new(r"\*\*([^*]+?)\s+\*\*").unwrap();
191
192 pub static ref FENCED_CODE_BLOCK_START: Regex = Regex::new(r"^(\s*)```(?:[^`\r\n]*)$").unwrap();
194 pub static ref FENCED_CODE_BLOCK_END: Regex = Regex::new(r"^(\s*)```\s*$").unwrap();
195 pub static ref ALTERNATE_FENCED_CODE_BLOCK_START: Regex = Regex::new(r"^(\s*)~~~(?:[^~\r\n]*)$").unwrap();
196 pub static ref ALTERNATE_FENCED_CODE_BLOCK_END: Regex = Regex::new(r"^(\s*)~~~\s*$").unwrap();
197 pub static ref INDENTED_CODE_BLOCK_PATTERN: Regex = Regex::new(r"^(\s{4,})").unwrap();
198
199 pub static ref HTML_TAG_REGEX: Regex = Regex::new(r"<([a-zA-Z][^>]*)>").unwrap();
201 pub static ref HTML_SELF_CLOSING_TAG_REGEX: Regex = Regex::new(r"<([a-zA-Z][^>]*/)>").unwrap();
202 pub static ref HTML_TAG_FINDER: Regex = Regex::new("(?i)</?[a-zA-Z][^>]*>").unwrap();
203 pub static ref HTML_OPENING_TAG_FINDER: Regex = Regex::new("(?i)<[a-zA-Z][^>]*>").unwrap();
204 pub static ref HTML_TAG_QUICK_CHECK: Regex = Regex::new("(?i)</?[a-zA-Z]").unwrap();
205
206 pub static ref LINK_REFERENCE_DEFINITION_REGEX: Regex = Regex::new(r"^\s*\[([^\]]+)\]:\s+(.+)$").unwrap();
208 pub static ref INLINE_LINK_REGEX: Regex = Regex::new(r"\[([^\]]+)\]\(([^)]+)\)").unwrap();
209 pub static ref LINK_TEXT_REGEX: Regex = Regex::new(r"\[([^\]]*)\]").unwrap();
210 pub static ref LINK_REGEX: FancyRegex = FancyRegex::new(r"(?<!\\)\[([^\]]*)\]\(([^)#]*)#([^)]+)\)").unwrap();
211 pub static ref EXTERNAL_URL_REGEX: FancyRegex = FancyRegex::new(r"^(https?://|ftp://|www\.|[^/]+\.[a-z]{2,})").unwrap();
212
213 pub static ref IMAGE_REGEX: Regex = Regex::new(r"!\[([^\]]*)\]\(([^)]+)\)").unwrap();
215
216 pub static ref TRAILING_WHITESPACE_REGEX: Regex = Regex::new(r"\s+$").unwrap();
218 pub static ref MULTIPLE_BLANK_LINES_REGEX: Regex = Regex::new(r"\n{3,}").unwrap();
219
220 pub static ref FRONT_MATTER_REGEX: Regex = Regex::new(r"^---\n.*?\n---\n").unwrap();
222
223 pub static ref INLINE_CODE_REGEX: FancyRegex = FancyRegex::new(r"`[^`]+`").unwrap();
225 pub static ref BOLD_ASTERISK_REGEX: Regex = Regex::new(r"\*\*(.+?)\*\*").unwrap();
226 pub static ref BOLD_UNDERSCORE_REGEX: Regex = Regex::new(r"__(.+?)__").unwrap();
227 pub static ref ITALIC_ASTERISK_REGEX: Regex = Regex::new(r"\*([^*]+?)\*").unwrap();
228 pub static ref ITALIC_UNDERSCORE_REGEX: Regex = Regex::new(r"_([^_]+?)_").unwrap();
229 pub static ref LINK_TEXT_FULL_REGEX: FancyRegex = FancyRegex::new(r"\[([^\]]*)\]\([^)]*\)").unwrap();
230 pub static ref STRIKETHROUGH_REGEX: Regex = Regex::new(r"~~(.+?)~~").unwrap();
231 pub static ref MULTIPLE_HYPHENS: Regex = Regex::new(r"-{2,}").unwrap();
232 pub static ref TOC_SECTION_START: Regex = Regex::new(r"^#+\s*(?:Table of Contents|Contents|TOC)\s*$").unwrap();
233
234 pub static ref BLOCKQUOTE_PREFIX_RE: Regex = Regex::new(r"^(\s*>+\s*)").unwrap();
236
237 pub static ref IMAGE_REF_PATTERN: Regex = Regex::new(r"^!\[.*?\]\[.*?\]$").unwrap();
239 pub static ref LINK_REF_PATTERN: Regex = Regex::new(r"^\[.*?\]:\s*https?://\S+$").unwrap();
240 pub static ref URL_IN_TEXT: Regex = Regex::new(r"https?://\S+").unwrap();
241 pub static ref SENTENCE_END: Regex = Regex::new(r"[.!?]\s+[A-Z]").unwrap();
242 pub static ref ABBREVIATION: Regex = Regex::new(r"\b(?:Mr|Mrs|Ms|Dr|Prof|Sr|Jr|vs|etc|i\.e|e\.g|Inc|Corp|Ltd|Co|St|Ave|Blvd|Rd|Ph\.D|M\.D|B\.A|M\.A|Ph\.D|U\.S|U\.K|U\.N|N\.Y|L\.A|D\.C)\.\s+[A-Z]").unwrap();
243 pub static ref DECIMAL_NUMBER: Regex = Regex::new(r"\d+\.\s*\d+").unwrap();
244 pub static ref LIST_ITEM: Regex = Regex::new(r"^\s*\d+\.\s+").unwrap();
245 pub static ref REFERENCE_LINK: Regex = Regex::new(r"\[([^\]]*)\]\[([^\]]*)\]").unwrap();
246
247 pub static ref EMAIL_PATTERN: Regex = Regex::new(r"[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}").unwrap();
249}
250
251lazy_static! {
253 pub static ref REF_LINK_REGEX: FancyRegex = FancyRegex::new(r"(?<!\\)\[((?:[^\[\]\\]|\\.|\[[^\]]*\])*)\]\[([^\]]*)\]").unwrap();
256
257 pub static ref SHORTCUT_REF_REGEX: FancyRegex = FancyRegex::new(r"(?<![\\)\]])\[((?:[^\[\]\\]|\\.|\[[^\]]*\])*)\](?!\s*[\[\(])").unwrap();
262
263 pub static ref INLINE_LINK_FANCY_REGEX: FancyRegex = FancyRegex::new(r"(?<!\\)\[([^\]]+)\]\(([^)]+)\)").unwrap();
265
266 pub static ref INLINE_IMAGE_FANCY_REGEX: FancyRegex = FancyRegex::new(r"!\[([^\]]*)\]\(([^)]+)\)").unwrap();
268
269 pub static ref REF_IMAGE_REGEX: FancyRegex = FancyRegex::new(r"!\[((?:[^\[\]\\]|\\.|\[[^\]]*\])*)\]\[([^\]]*)\]").unwrap();
271
272 pub static ref FOOTNOTE_REF_REGEX: FancyRegex = FancyRegex::new(r"\[\^([^\]]+)\]").unwrap();
274
275 pub static ref STRIKETHROUGH_FANCY_REGEX: FancyRegex = FancyRegex::new(r"~~([^~]+)~~").unwrap();
277
278 pub static ref WIKI_LINK_REGEX: FancyRegex = FancyRegex::new(r"\[\[([^\]]+)\]\]").unwrap();
280
281 pub static ref INLINE_MATH_REGEX: FancyRegex = FancyRegex::new(r"(?<!\$)\$(?!\$)([^\$]+)\$(?!\$)").unwrap();
283 pub static ref DISPLAY_MATH_REGEX: FancyRegex = FancyRegex::new(r"\$\$([^\$]+)\$\$").unwrap();
284
285 pub static ref EMOJI_SHORTCODE_REGEX: FancyRegex = FancyRegex::new(r":([a-zA-Z0-9_+-]+):").unwrap();
287
288 pub static ref HTML_TAG_PATTERN: FancyRegex = FancyRegex::new(r"</?[a-zA-Z][^>]*>|<[a-zA-Z][^>]*/\s*>").unwrap();
290
291 pub static ref HTML_ENTITY_REGEX: FancyRegex = FancyRegex::new(r"&[a-zA-Z][a-zA-Z0-9]*;|&#\d+;|&#x[0-9a-fA-F]+;").unwrap();
293}
294
295lazy_static! {
297 pub static ref HTML_COMMENT_START: Regex = Regex::new(r"<!--").unwrap();
299 pub static ref HTML_COMMENT_END: Regex = Regex::new(r"-->").unwrap();
300 pub static ref HTML_COMMENT_PATTERN: Regex = Regex::new(r"<!--[\s\S]*?-->").unwrap();
301
302 pub static ref HTML_HEADING_PATTERN: FancyRegex = FancyRegex::new(r"^\s*<h([1-6])(?:\s[^>]*)?>.*</h\1>\s*$").unwrap();
304
305 pub static ref HEADING_CHECK: Regex = Regex::new(r"(?m)^(?:\s*)#").unwrap();
307
308 pub static ref HR_DASH: Regex = Regex::new(r"^\-{3,}\s*$").unwrap();
310 pub static ref HR_ASTERISK: Regex = Regex::new(r"^\*{3,}\s*$").unwrap();
311 pub static ref HR_UNDERSCORE: Regex = Regex::new(r"^_{3,}\s*$").unwrap();
312 pub static ref HR_SPACED_DASH: Regex = Regex::new(r"^(\-\s+){2,}\-\s*$").unwrap();
313 pub static ref HR_SPACED_ASTERISK: Regex = Regex::new(r"^(\*\s+){2,}\*\s*$").unwrap();
314 pub static ref HR_SPACED_UNDERSCORE: Regex = Regex::new(r"^(_\s+){2,}_\s*$").unwrap();
315}
316
317pub fn has_heading_markers(content: &str) -> bool {
320 content.contains('#')
321}
322
323pub fn has_list_markers(content: &str) -> bool {
325 content.contains('*')
326 || content.contains('-')
327 || content.contains('+')
328 || (content.contains('.') && content.contains(|c: char| c.is_ascii_digit()))
329}
330
331pub fn has_code_block_markers(content: &str) -> bool {
333 content.contains("```") || content.contains("~~~") || content.contains("\n ")
334 }
336
337pub fn has_emphasis_markers(content: &str) -> bool {
339 content.contains('*') || content.contains('_')
340}
341
342pub fn has_html_tags(content: &str) -> bool {
344 content.contains('<') && (content.contains('>') || content.contains("/>"))
345}
346
347pub fn has_link_markers(content: &str) -> bool {
349 (content.contains('[') && content.contains(']'))
350 || content.contains("http://")
351 || content.contains("https://")
352 || content.contains("ftp://")
353}
354
355pub fn has_image_markers(content: &str) -> bool {
357 content.contains("![")
358}
359
360pub fn contains_url(content: &str) -> bool {
363 if !content.contains("://") {
365 return false;
366 }
367
368 let chars: Vec<char> = content.chars().collect();
369 let mut i = 0;
370
371 while i < chars.len() {
372 if i + 2 < chars.len()
374 && ((chars[i] == 'h' && chars[i + 1] == 't' && chars[i + 2] == 't')
375 || (chars[i] == 'f' && chars[i + 1] == 't' && chars[i + 2] == 'p'))
376 {
377 let mut j = i;
379 while j + 2 < chars.len() {
380 if chars[j] == ':' && chars[j + 1] == '/' && chars[j + 2] == '/' {
381 return true;
382 }
383 j += 1;
384
385 if j > i + 10 {
387 break;
388 }
389 }
390 }
391 i += 1;
392 }
393
394 false
395}
396
397pub fn escape_regex(s: &str) -> String {
399 let special_chars = ['.', '+', '*', '?', '^', '$', '(', ')', '[', ']', '{', '}', '|', '\\'];
400 let mut result = String::with_capacity(s.len() * 2);
401
402 for c in s.chars() {
403 if special_chars.contains(&c) {
404 result.push('\\');
405 }
406 result.push(c);
407 }
408
409 result
410}
411
412#[cfg(test)]
413mod tests {
414 use super::*;
415
416 #[test]
417 fn test_regex_cache_new() {
418 let cache = RegexCache::new();
419 assert!(cache.cache.is_empty());
420 assert!(cache.fancy_cache.is_empty());
421 assert!(cache.usage_stats.is_empty());
422 }
423
424 #[test]
425 fn test_regex_cache_default() {
426 let cache = RegexCache::default();
427 assert!(cache.cache.is_empty());
428 assert!(cache.fancy_cache.is_empty());
429 assert!(cache.usage_stats.is_empty());
430 }
431
432 #[test]
433 fn test_get_regex_compilation() {
434 let mut cache = RegexCache::new();
435
436 let regex1 = cache.get_regex(r"\d+").unwrap();
438 assert_eq!(cache.cache.len(), 1);
439 assert_eq!(cache.usage_stats.get(r"\d+"), Some(&1));
440
441 let regex2 = cache.get_regex(r"\d+").unwrap();
443 assert_eq!(cache.cache.len(), 1);
444 assert_eq!(cache.usage_stats.get(r"\d+"), Some(&2));
445
446 assert!(Arc::ptr_eq(®ex1, ®ex2));
448 }
449
450 #[test]
451 fn test_get_regex_invalid_pattern() {
452 let mut cache = RegexCache::new();
453 let result = cache.get_regex(r"[unterminated");
454 assert!(result.is_err());
455 assert!(cache.cache.is_empty());
456 }
457
458 #[test]
459 fn test_get_fancy_regex_compilation() {
460 let mut cache = RegexCache::new();
461
462 let regex1 = cache.get_fancy_regex(r"(?<=foo)bar").unwrap();
464 assert_eq!(cache.fancy_cache.len(), 1);
465 assert_eq!(cache.usage_stats.get(r"(?<=foo)bar"), Some(&1));
466
467 let regex2 = cache.get_fancy_regex(r"(?<=foo)bar").unwrap();
469 assert_eq!(cache.fancy_cache.len(), 1);
470 assert_eq!(cache.usage_stats.get(r"(?<=foo)bar"), Some(&2));
471
472 assert!(Arc::ptr_eq(®ex1, ®ex2));
474 }
475
476 #[test]
477 fn test_get_fancy_regex_invalid_pattern() {
478 let mut cache = RegexCache::new();
479 let result = cache.get_fancy_regex(r"(?<=invalid");
480 assert!(result.is_err());
481 assert!(cache.fancy_cache.is_empty());
482 }
483
484 #[test]
485 fn test_get_stats() {
486 let mut cache = RegexCache::new();
487
488 let _ = cache.get_regex(r"\d+").unwrap();
490 let _ = cache.get_regex(r"\d+").unwrap();
491 let _ = cache.get_regex(r"\w+").unwrap();
492 let _ = cache.get_fancy_regex(r"(?<=foo)bar").unwrap();
493
494 let stats = cache.get_stats();
495 assert_eq!(stats.get(r"\d+"), Some(&2));
496 assert_eq!(stats.get(r"\w+"), Some(&1));
497 assert_eq!(stats.get(r"(?<=foo)bar"), Some(&1));
498 }
499
500 #[test]
501 fn test_clear_cache() {
502 let mut cache = RegexCache::new();
503
504 let _ = cache.get_regex(r"\d+").unwrap();
506 let _ = cache.get_fancy_regex(r"(?<=foo)bar").unwrap();
507
508 assert!(!cache.cache.is_empty());
509 assert!(!cache.fancy_cache.is_empty());
510 assert!(!cache.usage_stats.is_empty());
511
512 cache.clear();
514
515 assert!(cache.cache.is_empty());
516 assert!(cache.fancy_cache.is_empty());
517 assert!(cache.usage_stats.is_empty());
518 }
519
520 #[test]
521 fn test_global_cache_functions() {
522 let regex1 = get_cached_regex(r"\d{3}").unwrap();
524 let regex2 = get_cached_regex(r"\d{3}").unwrap();
525 assert!(Arc::ptr_eq(®ex1, ®ex2));
526
527 let fancy1 = get_cached_fancy_regex(r"(?<=test)ing").unwrap();
529 let fancy2 = get_cached_fancy_regex(r"(?<=test)ing").unwrap();
530 assert!(Arc::ptr_eq(&fancy1, &fancy2));
531
532 let stats = get_cache_stats();
534 assert!(stats.contains_key(r"\d{3}"));
535 assert!(stats.contains_key(r"(?<=test)ing"));
536 }
537
538 #[test]
539 fn test_regex_lazy_macro() {
540 let re = regex_lazy!(r"^test.*end$");
541 assert!(re.is_match("test something end"));
542 assert!(!re.is_match("test something"));
543
544 let re2 = regex_lazy!(r"^start.*finish$");
548 assert!(re2.is_match("start and finish"));
549 assert!(!re2.is_match("start without end"));
550 }
551
552 #[test]
553 fn test_has_heading_markers() {
554 assert!(has_heading_markers("# Heading"));
555 assert!(has_heading_markers("Text with # symbol"));
556 assert!(!has_heading_markers("Text without heading marker"));
557 }
558
559 #[test]
560 fn test_has_list_markers() {
561 assert!(has_list_markers("* Item"));
562 assert!(has_list_markers("- Item"));
563 assert!(has_list_markers("+ Item"));
564 assert!(has_list_markers("1. Item"));
565 assert!(!has_list_markers("Text without list markers"));
566 }
567
568 #[test]
569 fn test_has_code_block_markers() {
570 assert!(has_code_block_markers("```code```"));
571 assert!(has_code_block_markers("~~~code~~~"));
572 assert!(has_code_block_markers("Text\n indented code"));
573 assert!(!has_code_block_markers("Text without code blocks"));
574 }
575
576 #[test]
577 fn test_has_emphasis_markers() {
578 assert!(has_emphasis_markers("*emphasis*"));
579 assert!(has_emphasis_markers("_emphasis_"));
580 assert!(has_emphasis_markers("**bold**"));
581 assert!(has_emphasis_markers("__bold__"));
582 assert!(!has_emphasis_markers("no emphasis"));
583 }
584
585 #[test]
586 fn test_has_html_tags() {
587 assert!(has_html_tags("<div>content</div>"));
588 assert!(has_html_tags("<br/>"));
589 assert!(has_html_tags("<img src='test.jpg'>"));
590 assert!(!has_html_tags("no html tags"));
591 assert!(!has_html_tags("less than < but no tag"));
592 }
593
594 #[test]
595 fn test_has_link_markers() {
596 assert!(has_link_markers("[text](url)"));
597 assert!(has_link_markers("[reference][1]"));
598 assert!(has_link_markers("http://example.com"));
599 assert!(has_link_markers("https://example.com"));
600 assert!(has_link_markers("ftp://example.com"));
601 assert!(!has_link_markers("no links here"));
602 }
603
604 #[test]
605 fn test_has_image_markers() {
606 assert!(has_image_markers(""));
607 assert!(has_image_markers(""));
608 assert!(!has_image_markers("[link](url)"));
609 assert!(!has_image_markers("no images"));
610 }
611
612 #[test]
613 fn test_contains_url() {
614 assert!(contains_url("http://example.com"));
615 assert!(contains_url("Text with https://example.com link"));
616 assert!(contains_url("ftp://example.com"));
617 assert!(!contains_url("Text without URL"));
618 assert!(!contains_url("http not followed by ://"));
619
620 assert!(!contains_url("http"));
622 assert!(!contains_url("https"));
623 assert!(!contains_url("://"));
624 assert!(contains_url("Visit http://site.com now"));
625 assert!(contains_url("See https://secure.site.com/path"));
626 }
627
628 #[test]
629 fn test_contains_url_performance() {
630 let long_text = "a".repeat(10000);
632 assert!(!contains_url(&long_text));
633
634 let text_with_url = format!("{long_text}https://example.com");
636 assert!(contains_url(&text_with_url));
637 }
638
639 #[test]
640 fn test_escape_regex() {
641 assert_eq!(escape_regex("a.b"), "a\\.b");
642 assert_eq!(escape_regex("a+b*c"), "a\\+b\\*c");
643 assert_eq!(escape_regex("(test)"), "\\(test\\)");
644 assert_eq!(escape_regex("[a-z]"), "\\[a-z\\]");
645 assert_eq!(escape_regex("normal text"), "normal text");
646
647 assert_eq!(escape_regex(".$^{[(|)*+?\\"), "\\.\\$\\^\\{\\[\\(\\|\\)\\*\\+\\?\\\\");
649
650 assert_eq!(escape_regex(""), "");
652
653 assert_eq!(escape_regex("test.com/path?query=1"), "test\\.com/path\\?query=1");
655 }
656
657 #[test]
658 fn test_static_regex_patterns() {
659 assert!(URL_REGEX.is_match("https://example.com"));
661 assert!(URL_REGEX.is_match("http://test.org/path"));
662 assert!(URL_REGEX.is_match("ftp://files.com"));
663 assert!(!URL_REGEX.is_match("not a url"));
664
665 assert!(ATX_HEADING_REGEX.is_match("# Heading"));
667 assert!(ATX_HEADING_REGEX.is_match(" ## Indented"));
668 assert!(ATX_HEADING_REGEX.is_match("### "));
669 assert!(!ATX_HEADING_REGEX.is_match("Not a heading"));
670
671 assert!(UNORDERED_LIST_MARKER_REGEX.is_match("* Item"));
673 assert!(UNORDERED_LIST_MARKER_REGEX.is_match("- Item"));
674 assert!(UNORDERED_LIST_MARKER_REGEX.is_match("+ Item"));
675 assert!(ORDERED_LIST_MARKER_REGEX.is_match("1. Item"));
676 assert!(ORDERED_LIST_MARKER_REGEX.is_match("99. Item"));
677
678 assert!(FENCED_CODE_BLOCK_START_REGEX.is_match("```"));
680 assert!(FENCED_CODE_BLOCK_START_REGEX.is_match("```rust"));
681 assert!(FENCED_CODE_BLOCK_START_REGEX.is_match("~~~"));
682 assert!(FENCED_CODE_BLOCK_END_REGEX.is_match("```"));
683 assert!(FENCED_CODE_BLOCK_END_REGEX.is_match("~~~"));
684
685 assert!(BOLD_ASTERISK_REGEX.is_match("**bold**"));
687 assert!(BOLD_UNDERSCORE_REGEX.is_match("__bold__"));
688 assert!(ITALIC_ASTERISK_REGEX.is_match("*italic*"));
689 assert!(ITALIC_UNDERSCORE_REGEX.is_match("_italic_"));
690
691 assert!(HTML_TAG_REGEX.is_match("<div>"));
693 assert!(HTML_TAG_REGEX.is_match("<span class='test'>"));
694 assert!(HTML_SELF_CLOSING_TAG_REGEX.is_match("<br/>"));
695 assert!(HTML_SELF_CLOSING_TAG_REGEX.is_match("<img src='test'/>"));
696
697 assert!(TRAILING_WHITESPACE_REGEX.is_match("line with spaces "));
699 assert!(TRAILING_WHITESPACE_REGEX.is_match("tabs\t\t"));
700 assert!(MULTIPLE_BLANK_LINES_REGEX.is_match("\n\n\n"));
701 assert!(MULTIPLE_BLANK_LINES_REGEX.is_match("\n\n\n\n"));
702
703 assert!(BLOCKQUOTE_PREFIX_RE.is_match("> Quote"));
705 assert!(BLOCKQUOTE_PREFIX_RE.is_match(" > Indented quote"));
706 assert!(BLOCKQUOTE_PREFIX_RE.is_match(">> Nested"));
707 }
708
709 #[test]
710 fn test_thread_safety() {
711 use std::thread;
712
713 let handles: Vec<_> = (0..10)
714 .map(|i| {
715 thread::spawn(move || {
716 let pattern = format!(r"\d{{{i}}}");
717 let regex = get_cached_regex(&pattern).unwrap();
718 assert!(regex.is_match(&"1".repeat(i)));
719 })
720 })
721 .collect();
722
723 for handle in handles {
724 handle.join().unwrap();
725 }
726 }
727}