rumdl_lib/utils/
regex_cache.rs1use fancy_regex::Regex as FancyRegex;
23use lazy_static::lazy_static;
24use regex::Regex;
25use std::collections::HashMap;
26use std::sync::{Arc, Mutex};
27
28#[derive(Debug)]
30pub struct RegexCache {
31 cache: HashMap<String, Arc<Regex>>,
32 fancy_cache: HashMap<String, Arc<FancyRegex>>,
33 usage_stats: HashMap<String, u64>,
34}
35
36impl Default for RegexCache {
37 fn default() -> Self {
38 Self::new()
39 }
40}
41
42impl RegexCache {
43 pub fn new() -> Self {
44 Self {
45 cache: HashMap::new(),
46 fancy_cache: HashMap::new(),
47 usage_stats: HashMap::new(),
48 }
49 }
50
51 pub fn get_regex(&mut self, pattern: &str) -> Result<Arc<Regex>, regex::Error> {
53 if let Some(regex) = self.cache.get(pattern) {
54 *self.usage_stats.entry(pattern.to_string()).or_insert(0) += 1;
55 return Ok(regex.clone());
56 }
57
58 let regex = Arc::new(Regex::new(pattern)?);
59 self.cache.insert(pattern.to_string(), regex.clone());
60 *self.usage_stats.entry(pattern.to_string()).or_insert(0) += 1;
61 Ok(regex)
62 }
63
64 pub fn get_fancy_regex(&mut self, pattern: &str) -> Result<Arc<FancyRegex>, Box<fancy_regex::Error>> {
66 if let Some(regex) = self.fancy_cache.get(pattern) {
67 *self.usage_stats.entry(pattern.to_string()).or_insert(0) += 1;
68 return Ok(regex.clone());
69 }
70
71 match FancyRegex::new(pattern) {
72 Ok(regex) => {
73 let arc_regex = Arc::new(regex);
74 self.fancy_cache.insert(pattern.to_string(), arc_regex.clone());
75 *self.usage_stats.entry(pattern.to_string()).or_insert(0) += 1;
76 Ok(arc_regex)
77 }
78 Err(e) => Err(Box::new(e)),
79 }
80 }
81
82 pub fn get_stats(&self) -> HashMap<String, u64> {
84 self.usage_stats.clone()
85 }
86
87 pub fn clear(&mut self) {
89 self.cache.clear();
90 self.fancy_cache.clear();
91 self.usage_stats.clear();
92 }
93}
94
95lazy_static! {
96 static ref GLOBAL_REGEX_CACHE: Arc<Mutex<RegexCache>> = Arc::new(Mutex::new(RegexCache::new()));
98}
99
100pub fn get_cached_regex(pattern: &str) -> Result<Arc<Regex>, regex::Error> {
102 let mut cache = GLOBAL_REGEX_CACHE.lock().unwrap();
103 cache.get_regex(pattern)
104}
105
106pub fn get_cached_fancy_regex(pattern: &str) -> Result<Arc<FancyRegex>, Box<fancy_regex::Error>> {
108 let mut cache = GLOBAL_REGEX_CACHE.lock().unwrap();
109 cache.get_fancy_regex(pattern)
110}
111
112pub fn get_cache_stats() -> HashMap<String, u64> {
114 let cache = GLOBAL_REGEX_CACHE.lock().unwrap();
115 cache.get_stats()
116}
117
118#[macro_export]
127macro_rules! regex_lazy {
128 ($pattern:expr) => {{
129 lazy_static::lazy_static! {
130 static ref REGEX: regex::Regex = regex::Regex::new($pattern).unwrap();
131 }
132 &*REGEX
133 }};
134}
135
136#[macro_export]
138macro_rules! regex_cached {
139 ($pattern:expr) => {{ $crate::utils::regex_cache::get_cached_regex($pattern).expect("Failed to compile regex") }};
140}
141
142#[macro_export]
144macro_rules! fancy_regex_cached {
145 ($pattern:expr) => {{ $crate::utils::regex_cache::get_cached_fancy_regex($pattern).expect("Failed to compile fancy regex") }};
146}
147
148pub use crate::regex_lazy;
150
151lazy_static! {
152 pub static ref URL_REGEX: Regex = Regex::new(r#"(?:https?|ftp)://[^\s<>\[\]()'"]+[^\s<>\[\]()"'.,]"#).unwrap();
154 pub static ref BARE_URL_REGEX: Regex = Regex::new(r"(?:https?|ftp)://[^\s<>]+[^\s<>.]").unwrap();
155 pub static ref URL_PATTERN: Regex = Regex::new(r"((?:https?|ftp)://[^\s\)<>]+[^\s\)<>.,])").unwrap();
156
157 pub static ref ATX_HEADING_REGEX: Regex = Regex::new(r"^(\s*)(#{1,6})(\s+|$)").unwrap();
159 pub static ref CLOSED_ATX_HEADING_REGEX: Regex = Regex::new(r"^(\s*)(#{1,6})(\s+)(.*)(\s+)(#+)(\s*)$").unwrap();
160 pub static ref SETEXT_HEADING_REGEX: Regex = Regex::new(r"^(\s*)[^\s]+.*\n(\s*)(=+|-+)\s*$").unwrap();
161 pub static ref TRAILING_PUNCTUATION_REGEX: Regex = Regex::new(r"[.,:;!?]$").unwrap();
162
163 pub static ref ATX_HEADING_WITH_CAPTURE: Regex = Regex::new(r"^(#{1,6})\s+(.+?)(?:\s+#*\s*)?$").unwrap();
165 pub static ref SETEXT_HEADING_WITH_CAPTURE: FancyRegex = FancyRegex::new(r"^([^\n]+)\n([=\-])\2+\s*$").unwrap();
166
167 pub static ref UNORDERED_LIST_MARKER_REGEX: Regex = Regex::new(r"^(\s*)([*+-])(\s+)").unwrap();
169 pub static ref ORDERED_LIST_MARKER_REGEX: Regex = Regex::new(r"^(\s*)(\d+)([.)])(\s+)").unwrap();
170 pub static ref LIST_MARKER_ANY_REGEX: Regex = Regex::new(r"^(\s*)(?:([*+-])|(\d+)[.)])(\s+)").unwrap();
171
172 pub static ref FENCED_CODE_BLOCK_START_REGEX: Regex = Regex::new(r"^(\s*)(```|~~~)(.*)$").unwrap();
174 pub static ref FENCED_CODE_BLOCK_END_REGEX: Regex = Regex::new(r"^(\s*)(```|~~~)(\s*)$").unwrap();
175 pub static ref INDENTED_CODE_BLOCK_REGEX: Regex = Regex::new(r"^(\s{4,})(.*)$").unwrap();
176 pub static ref CODE_FENCE_REGEX: Regex = Regex::new(r"^(`{3,}|~{3,})").unwrap();
177
178 pub static ref EMPHASIS_REGEX: FancyRegex = FancyRegex::new(r"(\s|^)(\*{1,2}|_{1,2})(?=\S)(.+?)(?<=\S)(\2)(\s|$)").unwrap();
180 pub static ref SPACE_IN_EMPHASIS_REGEX: FancyRegex = FancyRegex::new(r"(\*|_)(\s+)(.+?)(\s+)(\1)").unwrap();
181
182 pub static ref ASTERISK_EMPHASIS: Regex = Regex::new(r"(?:^|[^*])\*(\s+[^*]+\s*|\s*[^*]+\s+)\*(?:[^*]|$)").unwrap();
186 pub static ref UNDERSCORE_EMPHASIS: Regex = Regex::new(r"(?:^|[^_])_(\s+[^_]+\s*|\s*[^_]+\s+)_(?:[^_]|$)").unwrap();
187 pub static ref DOUBLE_UNDERSCORE_EMPHASIS: Regex = Regex::new(r"(?:^|[^_])__(\s+[^_]+\s*|\s*[^_]+\s+)__(?:[^_]|$)").unwrap();
188 pub static ref DOUBLE_ASTERISK_EMPHASIS: FancyRegex = FancyRegex::new(r"\*\*\s+([^*]+?)\s+\*\*").unwrap();
189 pub static ref DOUBLE_ASTERISK_SPACE_START: FancyRegex = FancyRegex::new(r"\*\*\s+([^*]+?)\*\*").unwrap();
190 pub static ref DOUBLE_ASTERISK_SPACE_END: FancyRegex = FancyRegex::new(r"\*\*([^*]+?)\s+\*\*").unwrap();
191
192 pub static ref FENCED_CODE_BLOCK_START: Regex = Regex::new(r"^(\s*)```(?:[^`\r\n]*)$").unwrap();
194 pub static ref FENCED_CODE_BLOCK_END: Regex = Regex::new(r"^(\s*)```\s*$").unwrap();
195 pub static ref ALTERNATE_FENCED_CODE_BLOCK_START: Regex = Regex::new(r"^(\s*)~~~(?:[^~\r\n]*)$").unwrap();
196 pub static ref ALTERNATE_FENCED_CODE_BLOCK_END: Regex = Regex::new(r"^(\s*)~~~\s*$").unwrap();
197 pub static ref INDENTED_CODE_BLOCK_PATTERN: Regex = Regex::new(r"^(\s{4,})").unwrap();
198
199 pub static ref HTML_TAG_REGEX: Regex = Regex::new(r"<([a-zA-Z][^>]*)>").unwrap();
201 pub static ref HTML_SELF_CLOSING_TAG_REGEX: Regex = Regex::new(r"<([a-zA-Z][^>]*/)>").unwrap();
202 pub static ref HTML_TAG_FINDER: Regex = Regex::new("(?i)</?[a-zA-Z][^>]*>").unwrap();
203 pub static ref HTML_TAG_QUICK_CHECK: Regex = Regex::new("(?i)</?[a-zA-Z]").unwrap();
204
205 pub static ref LINK_REFERENCE_DEFINITION_REGEX: Regex = Regex::new(r"^\s*\[([^\]]+)\]:\s+(.+)$").unwrap();
207 pub static ref INLINE_LINK_REGEX: Regex = Regex::new(r"\[([^\]]+)\]\(([^)]+)\)").unwrap();
208 pub static ref LINK_TEXT_REGEX: Regex = Regex::new(r"\[([^\]]*)\]").unwrap();
209 pub static ref LINK_REGEX: FancyRegex = FancyRegex::new(r"(?<!\\)\[([^\]]*)\]\(([^)#]*)#([^)]+)\)").unwrap();
210 pub static ref EXTERNAL_URL_REGEX: FancyRegex = FancyRegex::new(r"^(https?://|ftp://|www\.|[^/]+\.[a-z]{2,})").unwrap();
211
212 pub static ref IMAGE_REGEX: Regex = Regex::new(r"!\[([^\]]*)\]\(([^)]+)\)").unwrap();
214
215 pub static ref TRAILING_WHITESPACE_REGEX: Regex = Regex::new(r"\s+$").unwrap();
217 pub static ref MULTIPLE_BLANK_LINES_REGEX: Regex = Regex::new(r"\n{3,}").unwrap();
218
219 pub static ref FRONT_MATTER_REGEX: Regex = Regex::new(r"^---\n.*?\n---\n").unwrap();
221
222 pub static ref INLINE_CODE_REGEX: FancyRegex = FancyRegex::new(r"`[^`]+`").unwrap();
224 pub static ref BOLD_ASTERISK_REGEX: Regex = Regex::new(r"\*\*(.+?)\*\*").unwrap();
225 pub static ref BOLD_UNDERSCORE_REGEX: Regex = Regex::new(r"__(.+?)__").unwrap();
226 pub static ref ITALIC_ASTERISK_REGEX: Regex = Regex::new(r"\*([^*]+?)\*").unwrap();
227 pub static ref ITALIC_UNDERSCORE_REGEX: Regex = Regex::new(r"_([^_]+?)_").unwrap();
228 pub static ref LINK_TEXT_FULL_REGEX: FancyRegex = FancyRegex::new(r"\[([^\]]*)\]\([^)]*\)").unwrap();
229 pub static ref STRIKETHROUGH_REGEX: Regex = Regex::new(r"~~(.+?)~~").unwrap();
230 pub static ref MULTIPLE_HYPHENS: Regex = Regex::new(r"-{2,}").unwrap();
231 pub static ref TOC_SECTION_START: Regex = Regex::new(r"^#+\s*(?:Table of Contents|Contents|TOC)\s*$").unwrap();
232
233 pub static ref BLOCKQUOTE_PREFIX_RE: Regex = Regex::new(r"^(\s*>+\s*)").unwrap();
235
236 pub static ref IMAGE_REF_PATTERN: Regex = Regex::new(r"^!\[.*?\]\[.*?\]$").unwrap();
238 pub static ref LINK_REF_PATTERN: Regex = Regex::new(r"^\[.*?\]:\s*https?://\S+$").unwrap();
239 pub static ref URL_IN_TEXT: Regex = Regex::new(r"https?://\S+").unwrap();
240 pub static ref SENTENCE_END: Regex = Regex::new(r"[.!?]\s+[A-Z]").unwrap();
241 pub static ref ABBREVIATION: Regex = Regex::new(r"\b(?:Mr|Mrs|Ms|Dr|Prof|Sr|Jr|vs|etc|i\.e|e\.g|Inc|Corp|Ltd|Co|St|Ave|Blvd|Rd|Ph\.D|M\.D|B\.A|M\.A|Ph\.D|U\.S|U\.K|U\.N|N\.Y|L\.A|D\.C)\.\s+[A-Z]").unwrap();
242 pub static ref DECIMAL_NUMBER: Regex = Regex::new(r"\d+\.\s*\d+").unwrap();
243 pub static ref LIST_ITEM: Regex = Regex::new(r"^\s*\d+\.\s+").unwrap();
244 pub static ref REFERENCE_LINK: Regex = Regex::new(r"\[([^\]]*)\]\[([^\]]*)\]").unwrap();
245
246 pub static ref EMAIL_PATTERN: Regex = Regex::new(r"[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}").unwrap();
248}
249
250lazy_static! {
252 pub static ref REF_LINK_REGEX: FancyRegex = FancyRegex::new(r"(?<!\\)\[((?:[^\[\]\\]|\\.|\[[^\]]*\])*)\]\[([^\]]*)\]").unwrap();
255
256 pub static ref SHORTCUT_REF_REGEX: FancyRegex = FancyRegex::new(r"(?<![\\)\]])\[((?:[^\[\]\\]|\\.|\[[^\]]*\])*)\](?!\s*[\[\(])").unwrap();
261
262 pub static ref INLINE_LINK_FANCY_REGEX: FancyRegex = FancyRegex::new(r"(?<!\\)\[([^\]]+)\]\(([^)]+)\)").unwrap();
264
265 pub static ref INLINE_IMAGE_FANCY_REGEX: FancyRegex = FancyRegex::new(r"!\[([^\]]*)\]\(([^)]+)\)").unwrap();
267
268 pub static ref REF_IMAGE_REGEX: FancyRegex = FancyRegex::new(r"!\[((?:[^\[\]\\]|\\.|\[[^\]]*\])*)\]\[([^\]]*)\]").unwrap();
270
271 pub static ref FOOTNOTE_REF_REGEX: FancyRegex = FancyRegex::new(r"\[\^([^\]]+)\]").unwrap();
273
274 pub static ref STRIKETHROUGH_FANCY_REGEX: FancyRegex = FancyRegex::new(r"~~([^~]+)~~").unwrap();
276
277 pub static ref WIKI_LINK_REGEX: FancyRegex = FancyRegex::new(r"\[\[([^\]]+)\]\]").unwrap();
279
280 pub static ref INLINE_MATH_REGEX: FancyRegex = FancyRegex::new(r"(?<!\$)\$(?!\$)([^\$]+)\$(?!\$)").unwrap();
282 pub static ref DISPLAY_MATH_REGEX: FancyRegex = FancyRegex::new(r"\$\$([^\$]+)\$\$").unwrap();
283
284 pub static ref EMOJI_SHORTCODE_REGEX: FancyRegex = FancyRegex::new(r":([a-zA-Z0-9_+-]+):").unwrap();
286
287 pub static ref HTML_TAG_PATTERN: FancyRegex = FancyRegex::new(r"</?[a-zA-Z][^>]*>|<[a-zA-Z][^>]*/\s*>").unwrap();
289
290 pub static ref HTML_ENTITY_REGEX: FancyRegex = FancyRegex::new(r"&[a-zA-Z][a-zA-Z0-9]*;|&#\d+;|&#x[0-9a-fA-F]+;").unwrap();
292}
293
294lazy_static! {
296 pub static ref HTML_COMMENT_START: Regex = Regex::new(r"<!--").unwrap();
298 pub static ref HTML_COMMENT_END: Regex = Regex::new(r"-->").unwrap();
299 pub static ref HTML_COMMENT_PATTERN: Regex = Regex::new(r"<!--[\s\S]*?-->").unwrap();
300
301 pub static ref HTML_HEADING_PATTERN: FancyRegex = FancyRegex::new(r"^\s*<h([1-6])(?:\s[^>]*)?>.*</h\1>\s*$").unwrap();
303
304 pub static ref HEADING_CHECK: Regex = Regex::new(r"(?m)^(?:\s*)#").unwrap();
306
307 pub static ref HR_DASH: Regex = Regex::new(r"^\-{3,}\s*$").unwrap();
309 pub static ref HR_ASTERISK: Regex = Regex::new(r"^\*{3,}\s*$").unwrap();
310 pub static ref HR_UNDERSCORE: Regex = Regex::new(r"^_{3,}\s*$").unwrap();
311 pub static ref HR_SPACED_DASH: Regex = Regex::new(r"^(\-\s+){2,}\-\s*$").unwrap();
312 pub static ref HR_SPACED_ASTERISK: Regex = Regex::new(r"^(\*\s+){2,}\*\s*$").unwrap();
313 pub static ref HR_SPACED_UNDERSCORE: Regex = Regex::new(r"^(_\s+){2,}_\s*$").unwrap();
314}
315
316pub fn has_heading_markers(content: &str) -> bool {
319 content.contains('#')
320}
321
322pub fn has_list_markers(content: &str) -> bool {
324 content.contains('*')
325 || content.contains('-')
326 || content.contains('+')
327 || (content.contains('.') && content.contains(|c: char| c.is_ascii_digit()))
328}
329
330pub fn has_code_block_markers(content: &str) -> bool {
332 content.contains("```") || content.contains("~~~") || content.contains("\n ")
333 }
335
336pub fn has_emphasis_markers(content: &str) -> bool {
338 content.contains('*') || content.contains('_')
339}
340
341pub fn has_html_tags(content: &str) -> bool {
343 content.contains('<') && (content.contains('>') || content.contains("/>"))
344}
345
346pub fn has_link_markers(content: &str) -> bool {
348 (content.contains('[') && content.contains(']'))
349 || content.contains("http://")
350 || content.contains("https://")
351 || content.contains("ftp://")
352}
353
354pub fn has_image_markers(content: &str) -> bool {
356 content.contains("![")
357}
358
359pub fn contains_url(content: &str) -> bool {
362 if !content.contains("://") {
364 return false;
365 }
366
367 let chars: Vec<char> = content.chars().collect();
368 let mut i = 0;
369
370 while i < chars.len() {
371 if i + 2 < chars.len()
373 && ((chars[i] == 'h' && chars[i + 1] == 't' && chars[i + 2] == 't')
374 || (chars[i] == 'f' && chars[i + 1] == 't' && chars[i + 2] == 'p'))
375 {
376 let mut j = i;
378 while j + 2 < chars.len() {
379 if chars[j] == ':' && chars[j + 1] == '/' && chars[j + 2] == '/' {
380 return true;
381 }
382 j += 1;
383
384 if j > i + 10 {
386 break;
387 }
388 }
389 }
390 i += 1;
391 }
392
393 false
394}
395
396pub fn escape_regex(s: &str) -> String {
398 let special_chars = ['.', '+', '*', '?', '^', '$', '(', ')', '[', ']', '{', '}', '|', '\\'];
399 let mut result = String::with_capacity(s.len() * 2);
400
401 for c in s.chars() {
402 if special_chars.contains(&c) {
403 result.push('\\');
404 }
405 result.push(c);
406 }
407
408 result
409}
410
411#[cfg(test)]
412mod tests {
413 use super::*;
414
415 #[test]
416 fn test_regex_cache_new() {
417 let cache = RegexCache::new();
418 assert!(cache.cache.is_empty());
419 assert!(cache.fancy_cache.is_empty());
420 assert!(cache.usage_stats.is_empty());
421 }
422
423 #[test]
424 fn test_regex_cache_default() {
425 let cache = RegexCache::default();
426 assert!(cache.cache.is_empty());
427 assert!(cache.fancy_cache.is_empty());
428 assert!(cache.usage_stats.is_empty());
429 }
430
431 #[test]
432 fn test_get_regex_compilation() {
433 let mut cache = RegexCache::new();
434
435 let regex1 = cache.get_regex(r"\d+").unwrap();
437 assert_eq!(cache.cache.len(), 1);
438 assert_eq!(cache.usage_stats.get(r"\d+"), Some(&1));
439
440 let regex2 = cache.get_regex(r"\d+").unwrap();
442 assert_eq!(cache.cache.len(), 1);
443 assert_eq!(cache.usage_stats.get(r"\d+"), Some(&2));
444
445 assert!(Arc::ptr_eq(®ex1, ®ex2));
447 }
448
449 #[test]
450 fn test_get_regex_invalid_pattern() {
451 let mut cache = RegexCache::new();
452 let result = cache.get_regex(r"[unterminated");
453 assert!(result.is_err());
454 assert!(cache.cache.is_empty());
455 }
456
457 #[test]
458 fn test_get_fancy_regex_compilation() {
459 let mut cache = RegexCache::new();
460
461 let regex1 = cache.get_fancy_regex(r"(?<=foo)bar").unwrap();
463 assert_eq!(cache.fancy_cache.len(), 1);
464 assert_eq!(cache.usage_stats.get(r"(?<=foo)bar"), Some(&1));
465
466 let regex2 = cache.get_fancy_regex(r"(?<=foo)bar").unwrap();
468 assert_eq!(cache.fancy_cache.len(), 1);
469 assert_eq!(cache.usage_stats.get(r"(?<=foo)bar"), Some(&2));
470
471 assert!(Arc::ptr_eq(®ex1, ®ex2));
473 }
474
475 #[test]
476 fn test_get_fancy_regex_invalid_pattern() {
477 let mut cache = RegexCache::new();
478 let result = cache.get_fancy_regex(r"(?<=invalid");
479 assert!(result.is_err());
480 assert!(cache.fancy_cache.is_empty());
481 }
482
483 #[test]
484 fn test_get_stats() {
485 let mut cache = RegexCache::new();
486
487 let _ = cache.get_regex(r"\d+").unwrap();
489 let _ = cache.get_regex(r"\d+").unwrap();
490 let _ = cache.get_regex(r"\w+").unwrap();
491 let _ = cache.get_fancy_regex(r"(?<=foo)bar").unwrap();
492
493 let stats = cache.get_stats();
494 assert_eq!(stats.get(r"\d+"), Some(&2));
495 assert_eq!(stats.get(r"\w+"), Some(&1));
496 assert_eq!(stats.get(r"(?<=foo)bar"), Some(&1));
497 }
498
499 #[test]
500 fn test_clear_cache() {
501 let mut cache = RegexCache::new();
502
503 let _ = cache.get_regex(r"\d+").unwrap();
505 let _ = cache.get_fancy_regex(r"(?<=foo)bar").unwrap();
506
507 assert!(!cache.cache.is_empty());
508 assert!(!cache.fancy_cache.is_empty());
509 assert!(!cache.usage_stats.is_empty());
510
511 cache.clear();
513
514 assert!(cache.cache.is_empty());
515 assert!(cache.fancy_cache.is_empty());
516 assert!(cache.usage_stats.is_empty());
517 }
518
519 #[test]
520 fn test_global_cache_functions() {
521 let regex1 = get_cached_regex(r"\d{3}").unwrap();
523 let regex2 = get_cached_regex(r"\d{3}").unwrap();
524 assert!(Arc::ptr_eq(®ex1, ®ex2));
525
526 let fancy1 = get_cached_fancy_regex(r"(?<=test)ing").unwrap();
528 let fancy2 = get_cached_fancy_regex(r"(?<=test)ing").unwrap();
529 assert!(Arc::ptr_eq(&fancy1, &fancy2));
530
531 let stats = get_cache_stats();
533 assert!(stats.contains_key(r"\d{3}"));
534 assert!(stats.contains_key(r"(?<=test)ing"));
535 }
536
537 #[test]
538 fn test_regex_lazy_macro() {
539 let re = regex_lazy!(r"^test.*end$");
540 assert!(re.is_match("test something end"));
541 assert!(!re.is_match("test something"));
542
543 let re2 = regex_lazy!(r"^start.*finish$");
547 assert!(re2.is_match("start and finish"));
548 assert!(!re2.is_match("start without end"));
549 }
550
551 #[test]
552 fn test_has_heading_markers() {
553 assert!(has_heading_markers("# Heading"));
554 assert!(has_heading_markers("Text with # symbol"));
555 assert!(!has_heading_markers("Text without heading marker"));
556 }
557
558 #[test]
559 fn test_has_list_markers() {
560 assert!(has_list_markers("* Item"));
561 assert!(has_list_markers("- Item"));
562 assert!(has_list_markers("+ Item"));
563 assert!(has_list_markers("1. Item"));
564 assert!(!has_list_markers("Text without list markers"));
565 }
566
567 #[test]
568 fn test_has_code_block_markers() {
569 assert!(has_code_block_markers("```code```"));
570 assert!(has_code_block_markers("~~~code~~~"));
571 assert!(has_code_block_markers("Text\n indented code"));
572 assert!(!has_code_block_markers("Text without code blocks"));
573 }
574
575 #[test]
576 fn test_has_emphasis_markers() {
577 assert!(has_emphasis_markers("*emphasis*"));
578 assert!(has_emphasis_markers("_emphasis_"));
579 assert!(has_emphasis_markers("**bold**"));
580 assert!(has_emphasis_markers("__bold__"));
581 assert!(!has_emphasis_markers("no emphasis"));
582 }
583
584 #[test]
585 fn test_has_html_tags() {
586 assert!(has_html_tags("<div>content</div>"));
587 assert!(has_html_tags("<br/>"));
588 assert!(has_html_tags("<img src='test.jpg'>"));
589 assert!(!has_html_tags("no html tags"));
590 assert!(!has_html_tags("less than < but no tag"));
591 }
592
593 #[test]
594 fn test_has_link_markers() {
595 assert!(has_link_markers("[text](url)"));
596 assert!(has_link_markers("[reference][1]"));
597 assert!(has_link_markers("http://example.com"));
598 assert!(has_link_markers("https://example.com"));
599 assert!(has_link_markers("ftp://example.com"));
600 assert!(!has_link_markers("no links here"));
601 }
602
603 #[test]
604 fn test_has_image_markers() {
605 assert!(has_image_markers(""));
606 assert!(has_image_markers(""));
607 assert!(!has_image_markers("[link](url)"));
608 assert!(!has_image_markers("no images"));
609 }
610
611 #[test]
612 fn test_contains_url() {
613 assert!(contains_url("http://example.com"));
614 assert!(contains_url("Text with https://example.com link"));
615 assert!(contains_url("ftp://example.com"));
616 assert!(!contains_url("Text without URL"));
617 assert!(!contains_url("http not followed by ://"));
618
619 assert!(!contains_url("http"));
621 assert!(!contains_url("https"));
622 assert!(!contains_url("://"));
623 assert!(contains_url("Visit http://site.com now"));
624 assert!(contains_url("See https://secure.site.com/path"));
625 }
626
627 #[test]
628 fn test_contains_url_performance() {
629 let long_text = "a".repeat(10000);
631 assert!(!contains_url(&long_text));
632
633 let text_with_url = format!("{long_text}https://example.com");
635 assert!(contains_url(&text_with_url));
636 }
637
638 #[test]
639 fn test_escape_regex() {
640 assert_eq!(escape_regex("a.b"), "a\\.b");
641 assert_eq!(escape_regex("a+b*c"), "a\\+b\\*c");
642 assert_eq!(escape_regex("(test)"), "\\(test\\)");
643 assert_eq!(escape_regex("[a-z]"), "\\[a-z\\]");
644 assert_eq!(escape_regex("normal text"), "normal text");
645
646 assert_eq!(escape_regex(".$^{[(|)*+?\\"), "\\.\\$\\^\\{\\[\\(\\|\\)\\*\\+\\?\\\\");
648
649 assert_eq!(escape_regex(""), "");
651
652 assert_eq!(escape_regex("test.com/path?query=1"), "test\\.com/path\\?query=1");
654 }
655
656 #[test]
657 fn test_static_regex_patterns() {
658 assert!(URL_REGEX.is_match("https://example.com"));
660 assert!(URL_REGEX.is_match("http://test.org/path"));
661 assert!(URL_REGEX.is_match("ftp://files.com"));
662 assert!(!URL_REGEX.is_match("not a url"));
663
664 assert!(ATX_HEADING_REGEX.is_match("# Heading"));
666 assert!(ATX_HEADING_REGEX.is_match(" ## Indented"));
667 assert!(ATX_HEADING_REGEX.is_match("### "));
668 assert!(!ATX_HEADING_REGEX.is_match("Not a heading"));
669
670 assert!(UNORDERED_LIST_MARKER_REGEX.is_match("* Item"));
672 assert!(UNORDERED_LIST_MARKER_REGEX.is_match("- Item"));
673 assert!(UNORDERED_LIST_MARKER_REGEX.is_match("+ Item"));
674 assert!(ORDERED_LIST_MARKER_REGEX.is_match("1. Item"));
675 assert!(ORDERED_LIST_MARKER_REGEX.is_match("99. Item"));
676
677 assert!(FENCED_CODE_BLOCK_START_REGEX.is_match("```"));
679 assert!(FENCED_CODE_BLOCK_START_REGEX.is_match("```rust"));
680 assert!(FENCED_CODE_BLOCK_START_REGEX.is_match("~~~"));
681 assert!(FENCED_CODE_BLOCK_END_REGEX.is_match("```"));
682 assert!(FENCED_CODE_BLOCK_END_REGEX.is_match("~~~"));
683
684 assert!(BOLD_ASTERISK_REGEX.is_match("**bold**"));
686 assert!(BOLD_UNDERSCORE_REGEX.is_match("__bold__"));
687 assert!(ITALIC_ASTERISK_REGEX.is_match("*italic*"));
688 assert!(ITALIC_UNDERSCORE_REGEX.is_match("_italic_"));
689
690 assert!(HTML_TAG_REGEX.is_match("<div>"));
692 assert!(HTML_TAG_REGEX.is_match("<span class='test'>"));
693 assert!(HTML_SELF_CLOSING_TAG_REGEX.is_match("<br/>"));
694 assert!(HTML_SELF_CLOSING_TAG_REGEX.is_match("<img src='test'/>"));
695
696 assert!(TRAILING_WHITESPACE_REGEX.is_match("line with spaces "));
698 assert!(TRAILING_WHITESPACE_REGEX.is_match("tabs\t\t"));
699 assert!(MULTIPLE_BLANK_LINES_REGEX.is_match("\n\n\n"));
700 assert!(MULTIPLE_BLANK_LINES_REGEX.is_match("\n\n\n\n"));
701
702 assert!(BLOCKQUOTE_PREFIX_RE.is_match("> Quote"));
704 assert!(BLOCKQUOTE_PREFIX_RE.is_match(" > Indented quote"));
705 assert!(BLOCKQUOTE_PREFIX_RE.is_match(">> Nested"));
706 }
707
708 #[test]
709 fn test_thread_safety() {
710 use std::thread;
711
712 let handles: Vec<_> = (0..10)
713 .map(|i| {
714 thread::spawn(move || {
715 let pattern = format!(r"\d{{{i}}}");
716 let regex = get_cached_regex(&pattern).unwrap();
717 assert!(regex.is_match(&"1".repeat(i)));
718 })
719 })
720 .collect();
721
722 for handle in handles {
723 handle.join().unwrap();
724 }
725 }
726}