rumdl_lib/utils/
regex_cache.rs1use fancy_regex::Regex as FancyRegex;
23use lazy_static::lazy_static;
24use regex::Regex;
25use std::collections::HashMap;
26use std::sync::{Arc, Mutex};
27
28#[derive(Debug)]
30pub struct RegexCache {
31 cache: HashMap<String, Arc<Regex>>,
32 fancy_cache: HashMap<String, Arc<FancyRegex>>,
33 usage_stats: HashMap<String, u64>,
34}
35
36impl Default for RegexCache {
37 fn default() -> Self {
38 Self::new()
39 }
40}
41
42impl RegexCache {
43 pub fn new() -> Self {
44 Self {
45 cache: HashMap::new(),
46 fancy_cache: HashMap::new(),
47 usage_stats: HashMap::new(),
48 }
49 }
50
51 pub fn get_regex(&mut self, pattern: &str) -> Result<Arc<Regex>, regex::Error> {
53 if let Some(regex) = self.cache.get(pattern) {
54 *self.usage_stats.entry(pattern.to_string()).or_insert(0) += 1;
55 return Ok(regex.clone());
56 }
57
58 let regex = Arc::new(Regex::new(pattern)?);
59 self.cache.insert(pattern.to_string(), regex.clone());
60 *self.usage_stats.entry(pattern.to_string()).or_insert(0) += 1;
61 Ok(regex)
62 }
63
64 pub fn get_fancy_regex(&mut self, pattern: &str) -> Result<Arc<FancyRegex>, Box<fancy_regex::Error>> {
66 if let Some(regex) = self.fancy_cache.get(pattern) {
67 *self.usage_stats.entry(pattern.to_string()).or_insert(0) += 1;
68 return Ok(regex.clone());
69 }
70
71 match FancyRegex::new(pattern) {
72 Ok(regex) => {
73 let arc_regex = Arc::new(regex);
74 self.fancy_cache.insert(pattern.to_string(), arc_regex.clone());
75 *self.usage_stats.entry(pattern.to_string()).or_insert(0) += 1;
76 Ok(arc_regex)
77 }
78 Err(e) => Err(Box::new(e)),
79 }
80 }
81
82 pub fn get_stats(&self) -> HashMap<String, u64> {
84 self.usage_stats.clone()
85 }
86
87 pub fn clear(&mut self) {
89 self.cache.clear();
90 self.fancy_cache.clear();
91 self.usage_stats.clear();
92 }
93}
94
95lazy_static! {
96 static ref GLOBAL_REGEX_CACHE: Arc<Mutex<RegexCache>> = Arc::new(Mutex::new(RegexCache::new()));
98}
99
100pub fn get_cached_regex(pattern: &str) -> Result<Arc<Regex>, regex::Error> {
102 let mut cache = GLOBAL_REGEX_CACHE.lock().unwrap();
103 cache.get_regex(pattern)
104}
105
106pub fn get_cached_fancy_regex(pattern: &str) -> Result<Arc<FancyRegex>, Box<fancy_regex::Error>> {
108 let mut cache = GLOBAL_REGEX_CACHE.lock().unwrap();
109 cache.get_fancy_regex(pattern)
110}
111
112pub fn get_cache_stats() -> HashMap<String, u64> {
114 let cache = GLOBAL_REGEX_CACHE.lock().unwrap();
115 cache.get_stats()
116}
117
118#[macro_export]
127macro_rules! regex_lazy {
128 ($pattern:expr) => {{
129 lazy_static::lazy_static! {
130 static ref REGEX: regex::Regex = regex::Regex::new($pattern).unwrap();
131 }
132 &*REGEX
133 }};
134}
135
136#[macro_export]
138macro_rules! regex_cached {
139 ($pattern:expr) => {{ $crate::utils::regex_cache::get_cached_regex($pattern).expect("Failed to compile regex") }};
140}
141
142#[macro_export]
144macro_rules! fancy_regex_cached {
145 ($pattern:expr) => {{ $crate::utils::regex_cache::get_cached_fancy_regex($pattern).expect("Failed to compile fancy regex") }};
146}
147
148pub use crate::regex_lazy;
150
151lazy_static! {
152 pub static ref URL_REGEX: Regex = Regex::new(r#"(?:https?|ftp)://[^\s<>\[\]()'"]+[^\s<>\[\]()"'.,]"#).unwrap();
154 pub static ref BARE_URL_REGEX: Regex = Regex::new(r"(?:https?|ftp)://[^\s<>]+[^\s<>.]").unwrap();
155 pub static ref URL_PATTERN: Regex = Regex::new(r"((?:https?|ftp)://[^\s\)<>]+[^\s\)<>.,])").unwrap();
156
157 pub static ref ATX_HEADING_REGEX: Regex = Regex::new(r"^(\s*)(#{1,6})(\s+|$)").unwrap();
159 pub static ref CLOSED_ATX_HEADING_REGEX: Regex = Regex::new(r"^(\s*)(#{1,6})(\s+)(.*)(\s+)(#+)(\s*)$").unwrap();
160 pub static ref SETEXT_HEADING_REGEX: Regex = Regex::new(r"^(\s*)[^\s]+.*\n(\s*)(=+|-+)\s*$").unwrap();
161 pub static ref TRAILING_PUNCTUATION_REGEX: Regex = Regex::new(r"[.,:;!?]$").unwrap();
162
163 pub static ref ATX_HEADING_WITH_CAPTURE: Regex = Regex::new(r"^(#{1,6})\s+(.+?)(?:\s+#*\s*)?$").unwrap();
165 pub static ref SETEXT_HEADING_WITH_CAPTURE: FancyRegex = FancyRegex::new(r"^([^\n]+)\n([=\-])\2+\s*$").unwrap();
166
167 pub static ref UNORDERED_LIST_MARKER_REGEX: Regex = Regex::new(r"^(\s*)([*+-])(\s+)").unwrap();
169 pub static ref ORDERED_LIST_MARKER_REGEX: Regex = Regex::new(r"^(\s*)(\d+)([.)])(\s+)").unwrap();
170 pub static ref LIST_MARKER_ANY_REGEX: Regex = Regex::new(r"^(\s*)(?:([*+-])|(\d+)[.)])(\s+)").unwrap();
171
172 pub static ref FENCED_CODE_BLOCK_START_REGEX: Regex = Regex::new(r"^(\s*)(```|~~~)(.*)$").unwrap();
174 pub static ref FENCED_CODE_BLOCK_END_REGEX: Regex = Regex::new(r"^(\s*)(```|~~~)(\s*)$").unwrap();
175 pub static ref INDENTED_CODE_BLOCK_REGEX: Regex = Regex::new(r"^(\s{4,})(.*)$").unwrap();
176 pub static ref CODE_FENCE_REGEX: Regex = Regex::new(r"^(`{3,}|~{3,})").unwrap();
177
178 pub static ref EMPHASIS_REGEX: FancyRegex = FancyRegex::new(r"(\s|^)(\*{1,2}|_{1,2})(?=\S)(.+?)(?<=\S)(\2)(\s|$)").unwrap();
180 pub static ref SPACE_IN_EMPHASIS_REGEX: FancyRegex = FancyRegex::new(r"(\*|_)(\s+)(.+?)(\s+)(\1)").unwrap();
181
182 pub static ref ASTERISK_EMPHASIS: Regex = Regex::new(r"(?:^|[^*])\*(\s+[^*]+\s*|\s*[^*]+\s+)\*(?:[^*]|$)").unwrap();
186 pub static ref UNDERSCORE_EMPHASIS: Regex = Regex::new(r"(?:^|[^_])_(\s+[^_]+\s*|\s*[^_]+\s+)_(?:[^_]|$)").unwrap();
187 pub static ref DOUBLE_UNDERSCORE_EMPHASIS: Regex = Regex::new(r"(?:^|[^_])__(\s+[^_]+\s*|\s*[^_]+\s+)__(?:[^_]|$)").unwrap();
188 pub static ref DOUBLE_ASTERISK_EMPHASIS: FancyRegex = FancyRegex::new(r"\*\*\s+([^*]+?)\s+\*\*").unwrap();
189 pub static ref DOUBLE_ASTERISK_SPACE_START: FancyRegex = FancyRegex::new(r"\*\*\s+([^*]+?)\*\*").unwrap();
190 pub static ref DOUBLE_ASTERISK_SPACE_END: FancyRegex = FancyRegex::new(r"\*\*([^*]+?)\s+\*\*").unwrap();
191
192 pub static ref FENCED_CODE_BLOCK_START: Regex = Regex::new(r"^(\s*)```(?:[^`\r\n]*)$").unwrap();
194 pub static ref FENCED_CODE_BLOCK_END: Regex = Regex::new(r"^(\s*)```\s*$").unwrap();
195 pub static ref ALTERNATE_FENCED_CODE_BLOCK_START: Regex = Regex::new(r"^(\s*)~~~(?:[^~\r\n]*)$").unwrap();
196 pub static ref ALTERNATE_FENCED_CODE_BLOCK_END: Regex = Regex::new(r"^(\s*)~~~\s*$").unwrap();
197 pub static ref INDENTED_CODE_BLOCK_PATTERN: Regex = Regex::new(r"^(\s{4,})").unwrap();
198
199 pub static ref HTML_TAG_REGEX: Regex = Regex::new(r"<([a-zA-Z][^>]*)>").unwrap();
201 pub static ref HTML_SELF_CLOSING_TAG_REGEX: Regex = Regex::new(r"<([a-zA-Z][^>]*/)>").unwrap();
202 pub static ref HTML_TAG_FINDER: Regex = Regex::new("(?i)</?[a-zA-Z][^>]*>").unwrap();
203 pub static ref HTML_TAG_QUICK_CHECK: Regex = Regex::new("(?i)</?[a-zA-Z]").unwrap();
204
205 pub static ref LINK_REFERENCE_DEFINITION_REGEX: Regex = Regex::new(r"^\s*\[([^\]]+)\]:\s+(.+)$").unwrap();
207 pub static ref INLINE_LINK_REGEX: Regex = Regex::new(r"\[([^\]]+)\]\(([^)]+)\)").unwrap();
208 pub static ref LINK_TEXT_REGEX: Regex = Regex::new(r"\[([^\]]*)\]").unwrap();
209 pub static ref LINK_REGEX: FancyRegex = FancyRegex::new(r"(?<!\\)\[([^\]]*)\]\(([^)#]*)#([^)]+)\)").unwrap();
210 pub static ref EXTERNAL_URL_REGEX: FancyRegex = FancyRegex::new(r"^(https?://|ftp://|www\.|[^/]+\.[a-z]{2,})").unwrap();
211
212 pub static ref IMAGE_REGEX: Regex = Regex::new(r"!\[([^\]]*)\]\(([^)]+)\)").unwrap();
214
215 pub static ref TRAILING_WHITESPACE_REGEX: Regex = Regex::new(r"\s+$").unwrap();
217 pub static ref MULTIPLE_BLANK_LINES_REGEX: Regex = Regex::new(r"\n{3,}").unwrap();
218
219 pub static ref FRONT_MATTER_REGEX: Regex = Regex::new(r"^---\n.*?\n---\n").unwrap();
221
222 pub static ref INLINE_CODE_REGEX: FancyRegex = FancyRegex::new(r"`[^`]+`").unwrap();
224 pub static ref BOLD_ASTERISK_REGEX: Regex = Regex::new(r"\*\*(.+?)\*\*").unwrap();
225 pub static ref BOLD_UNDERSCORE_REGEX: Regex = Regex::new(r"__(.+?)__").unwrap();
226 pub static ref ITALIC_ASTERISK_REGEX: Regex = Regex::new(r"\*([^*]+?)\*").unwrap();
227 pub static ref ITALIC_UNDERSCORE_REGEX: Regex = Regex::new(r"_([^_]+?)_").unwrap();
228 pub static ref LINK_TEXT_FULL_REGEX: FancyRegex = FancyRegex::new(r"\[([^\]]*)\]\([^)]*\)").unwrap();
229 pub static ref STRIKETHROUGH_REGEX: Regex = Regex::new(r"~~(.+?)~~").unwrap();
230 pub static ref MULTIPLE_HYPHENS: Regex = Regex::new(r"-{2,}").unwrap();
231 pub static ref TOC_SECTION_START: Regex = Regex::new(r"^#+\s*(?:Table of Contents|Contents|TOC)\s*$").unwrap();
232
233 pub static ref BLOCKQUOTE_PREFIX_RE: Regex = Regex::new(r"^(\s*>+\s*)").unwrap();
235
236 pub static ref IMAGE_REF_PATTERN: Regex = Regex::new(r"^!\[.*?\]\[.*?\]$").unwrap();
238 pub static ref LINK_REF_PATTERN: Regex = Regex::new(r"^\[.*?\]:\s*https?://\S+$").unwrap();
239 pub static ref URL_IN_TEXT: Regex = Regex::new(r"https?://\S+").unwrap();
240 pub static ref SENTENCE_END: Regex = Regex::new(r"[.!?]\s+[A-Z]").unwrap();
241 pub static ref ABBREVIATION: Regex = Regex::new(r"\b(?:Mr|Mrs|Ms|Dr|Prof|Sr|Jr|vs|etc|i\.e|e\.g|Inc|Corp|Ltd|Co|St|Ave|Blvd|Rd|Ph\.D|M\.D|B\.A|M\.A|Ph\.D|U\.S|U\.K|U\.N|N\.Y|L\.A|D\.C)\.\s+[A-Z]").unwrap();
242 pub static ref DECIMAL_NUMBER: Regex = Regex::new(r"\d+\.\s*\d+").unwrap();
243 pub static ref LIST_ITEM: Regex = Regex::new(r"^\s*\d+\.\s+").unwrap();
244 pub static ref REFERENCE_LINK: Regex = Regex::new(r"\[([^\]]*)\]\[([^\]]*)\]").unwrap();
245
246 pub static ref EMAIL_PATTERN: Regex = Regex::new(r"[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}").unwrap();
248}
249
250lazy_static! {
252 pub static ref REF_LINK_REGEX: FancyRegex = FancyRegex::new(r"(?<!\\)\[((?:[^\[\]\\]|\\.|\[[^\]]*\])*)\]\[([^\]]*)\]").unwrap();
255
256 pub static ref SHORTCUT_REF_REGEX: FancyRegex = FancyRegex::new(r"(?<![\\)\]])\[([^\]]+)\](?!\s*[\[\(])").unwrap();
260
261 pub static ref INLINE_LINK_FANCY_REGEX: FancyRegex = FancyRegex::new(r"(?<!\\)\[([^\]]+)\]\(([^)]+)\)").unwrap();
263
264 pub static ref INLINE_IMAGE_FANCY_REGEX: FancyRegex = FancyRegex::new(r"!\[([^\]]*)\]\(([^)]+)\)").unwrap();
266
267 pub static ref REF_IMAGE_REGEX: FancyRegex = FancyRegex::new(r"!\[((?:[^\[\]\\]|\\.|\[[^\]]*\])*)\]\[([^\]]*)\]").unwrap();
269
270 pub static ref FOOTNOTE_REF_REGEX: FancyRegex = FancyRegex::new(r"\[\^([^\]]+)\]").unwrap();
272
273 pub static ref STRIKETHROUGH_FANCY_REGEX: FancyRegex = FancyRegex::new(r"~~([^~]+)~~").unwrap();
275
276 pub static ref WIKI_LINK_REGEX: FancyRegex = FancyRegex::new(r"\[\[([^\]]+)\]\]").unwrap();
278
279 pub static ref INLINE_MATH_REGEX: FancyRegex = FancyRegex::new(r"(?<!\$)\$(?!\$)([^\$]+)\$(?!\$)").unwrap();
281 pub static ref DISPLAY_MATH_REGEX: FancyRegex = FancyRegex::new(r"\$\$([^\$]+)\$\$").unwrap();
282
283 pub static ref EMOJI_SHORTCODE_REGEX: FancyRegex = FancyRegex::new(r":([a-zA-Z0-9_+-]+):").unwrap();
285
286 pub static ref HTML_TAG_PATTERN: FancyRegex = FancyRegex::new(r"</?[a-zA-Z][^>]*>|<[a-zA-Z][^>]*/\s*>").unwrap();
288
289 pub static ref HTML_ENTITY_REGEX: FancyRegex = FancyRegex::new(r"&[a-zA-Z][a-zA-Z0-9]*;|&#\d+;|&#x[0-9a-fA-F]+;").unwrap();
291}
292
293lazy_static! {
295 pub static ref HTML_COMMENT_START: Regex = Regex::new(r"<!--").unwrap();
297 pub static ref HTML_COMMENT_END: Regex = Regex::new(r"-->").unwrap();
298 pub static ref HTML_COMMENT_PATTERN: Regex = Regex::new(r"<!--[\s\S]*?-->").unwrap();
299
300 pub static ref HTML_HEADING_PATTERN: FancyRegex = FancyRegex::new(r"^\s*<h([1-6])(?:\s[^>]*)?>.*</h\1>\s*$").unwrap();
302
303 pub static ref HEADING_CHECK: Regex = Regex::new(r"(?m)^(?:\s*)#").unwrap();
305
306 pub static ref HR_DASH: Regex = Regex::new(r"^\-{3,}\s*$").unwrap();
308 pub static ref HR_ASTERISK: Regex = Regex::new(r"^\*{3,}\s*$").unwrap();
309 pub static ref HR_UNDERSCORE: Regex = Regex::new(r"^_{3,}\s*$").unwrap();
310 pub static ref HR_SPACED_DASH: Regex = Regex::new(r"^(\-\s+){2,}\-\s*$").unwrap();
311 pub static ref HR_SPACED_ASTERISK: Regex = Regex::new(r"^(\*\s+){2,}\*\s*$").unwrap();
312 pub static ref HR_SPACED_UNDERSCORE: Regex = Regex::new(r"^(_\s+){2,}_\s*$").unwrap();
313}
314
315pub fn has_heading_markers(content: &str) -> bool {
318 content.contains('#')
319}
320
321pub fn has_list_markers(content: &str) -> bool {
323 content.contains('*')
324 || content.contains('-')
325 || content.contains('+')
326 || (content.contains('.') && content.contains(|c: char| c.is_ascii_digit()))
327}
328
329pub fn has_code_block_markers(content: &str) -> bool {
331 content.contains("```") || content.contains("~~~") || content.contains("\n ")
332 }
334
335pub fn has_emphasis_markers(content: &str) -> bool {
337 content.contains('*') || content.contains('_')
338}
339
340pub fn has_html_tags(content: &str) -> bool {
342 content.contains('<') && (content.contains('>') || content.contains("/>"))
343}
344
345pub fn has_link_markers(content: &str) -> bool {
347 (content.contains('[') && content.contains(']'))
348 || content.contains("http://")
349 || content.contains("https://")
350 || content.contains("ftp://")
351}
352
353pub fn has_image_markers(content: &str) -> bool {
355 content.contains("![")
356}
357
358pub fn contains_url(content: &str) -> bool {
361 if !content.contains("://") {
363 return false;
364 }
365
366 let chars: Vec<char> = content.chars().collect();
367 let mut i = 0;
368
369 while i < chars.len() {
370 if i + 2 < chars.len()
372 && ((chars[i] == 'h' && chars[i + 1] == 't' && chars[i + 2] == 't')
373 || (chars[i] == 'f' && chars[i + 1] == 't' && chars[i + 2] == 'p'))
374 {
375 let mut j = i;
377 while j + 2 < chars.len() {
378 if chars[j] == ':' && chars[j + 1] == '/' && chars[j + 2] == '/' {
379 return true;
380 }
381 j += 1;
382
383 if j > i + 10 {
385 break;
386 }
387 }
388 }
389 i += 1;
390 }
391
392 false
393}
394
395pub fn escape_regex(s: &str) -> String {
397 let special_chars = ['.', '+', '*', '?', '^', '$', '(', ')', '[', ']', '{', '}', '|', '\\'];
398 let mut result = String::with_capacity(s.len() * 2);
399
400 for c in s.chars() {
401 if special_chars.contains(&c) {
402 result.push('\\');
403 }
404 result.push(c);
405 }
406
407 result
408}
409
410#[cfg(test)]
411mod tests {
412 use super::*;
413
414 #[test]
415 fn test_regex_cache_new() {
416 let cache = RegexCache::new();
417 assert!(cache.cache.is_empty());
418 assert!(cache.fancy_cache.is_empty());
419 assert!(cache.usage_stats.is_empty());
420 }
421
422 #[test]
423 fn test_regex_cache_default() {
424 let cache = RegexCache::default();
425 assert!(cache.cache.is_empty());
426 assert!(cache.fancy_cache.is_empty());
427 assert!(cache.usage_stats.is_empty());
428 }
429
430 #[test]
431 fn test_get_regex_compilation() {
432 let mut cache = RegexCache::new();
433
434 let regex1 = cache.get_regex(r"\d+").unwrap();
436 assert_eq!(cache.cache.len(), 1);
437 assert_eq!(cache.usage_stats.get(r"\d+"), Some(&1));
438
439 let regex2 = cache.get_regex(r"\d+").unwrap();
441 assert_eq!(cache.cache.len(), 1);
442 assert_eq!(cache.usage_stats.get(r"\d+"), Some(&2));
443
444 assert!(Arc::ptr_eq(®ex1, ®ex2));
446 }
447
448 #[test]
449 fn test_get_regex_invalid_pattern() {
450 let mut cache = RegexCache::new();
451 let result = cache.get_regex(r"[unterminated");
452 assert!(result.is_err());
453 assert!(cache.cache.is_empty());
454 }
455
456 #[test]
457 fn test_get_fancy_regex_compilation() {
458 let mut cache = RegexCache::new();
459
460 let regex1 = cache.get_fancy_regex(r"(?<=foo)bar").unwrap();
462 assert_eq!(cache.fancy_cache.len(), 1);
463 assert_eq!(cache.usage_stats.get(r"(?<=foo)bar"), Some(&1));
464
465 let regex2 = cache.get_fancy_regex(r"(?<=foo)bar").unwrap();
467 assert_eq!(cache.fancy_cache.len(), 1);
468 assert_eq!(cache.usage_stats.get(r"(?<=foo)bar"), Some(&2));
469
470 assert!(Arc::ptr_eq(®ex1, ®ex2));
472 }
473
474 #[test]
475 fn test_get_fancy_regex_invalid_pattern() {
476 let mut cache = RegexCache::new();
477 let result = cache.get_fancy_regex(r"(?<=invalid");
478 assert!(result.is_err());
479 assert!(cache.fancy_cache.is_empty());
480 }
481
482 #[test]
483 fn test_get_stats() {
484 let mut cache = RegexCache::new();
485
486 let _ = cache.get_regex(r"\d+").unwrap();
488 let _ = cache.get_regex(r"\d+").unwrap();
489 let _ = cache.get_regex(r"\w+").unwrap();
490 let _ = cache.get_fancy_regex(r"(?<=foo)bar").unwrap();
491
492 let stats = cache.get_stats();
493 assert_eq!(stats.get(r"\d+"), Some(&2));
494 assert_eq!(stats.get(r"\w+"), Some(&1));
495 assert_eq!(stats.get(r"(?<=foo)bar"), Some(&1));
496 }
497
498 #[test]
499 fn test_clear_cache() {
500 let mut cache = RegexCache::new();
501
502 let _ = cache.get_regex(r"\d+").unwrap();
504 let _ = cache.get_fancy_regex(r"(?<=foo)bar").unwrap();
505
506 assert!(!cache.cache.is_empty());
507 assert!(!cache.fancy_cache.is_empty());
508 assert!(!cache.usage_stats.is_empty());
509
510 cache.clear();
512
513 assert!(cache.cache.is_empty());
514 assert!(cache.fancy_cache.is_empty());
515 assert!(cache.usage_stats.is_empty());
516 }
517
518 #[test]
519 fn test_global_cache_functions() {
520 let regex1 = get_cached_regex(r"\d{3}").unwrap();
522 let regex2 = get_cached_regex(r"\d{3}").unwrap();
523 assert!(Arc::ptr_eq(®ex1, ®ex2));
524
525 let fancy1 = get_cached_fancy_regex(r"(?<=test)ing").unwrap();
527 let fancy2 = get_cached_fancy_regex(r"(?<=test)ing").unwrap();
528 assert!(Arc::ptr_eq(&fancy1, &fancy2));
529
530 let stats = get_cache_stats();
532 assert!(stats.contains_key(r"\d{3}"));
533 assert!(stats.contains_key(r"(?<=test)ing"));
534 }
535
536 #[test]
537 fn test_regex_lazy_macro() {
538 let re = regex_lazy!(r"^test.*end$");
539 assert!(re.is_match("test something end"));
540 assert!(!re.is_match("test something"));
541
542 let re2 = regex_lazy!(r"^start.*finish$");
546 assert!(re2.is_match("start and finish"));
547 assert!(!re2.is_match("start without end"));
548 }
549
550 #[test]
551 fn test_has_heading_markers() {
552 assert!(has_heading_markers("# Heading"));
553 assert!(has_heading_markers("Text with # symbol"));
554 assert!(!has_heading_markers("Text without heading marker"));
555 }
556
557 #[test]
558 fn test_has_list_markers() {
559 assert!(has_list_markers("* Item"));
560 assert!(has_list_markers("- Item"));
561 assert!(has_list_markers("+ Item"));
562 assert!(has_list_markers("1. Item"));
563 assert!(!has_list_markers("Text without list markers"));
564 }
565
566 #[test]
567 fn test_has_code_block_markers() {
568 assert!(has_code_block_markers("```code```"));
569 assert!(has_code_block_markers("~~~code~~~"));
570 assert!(has_code_block_markers("Text\n indented code"));
571 assert!(!has_code_block_markers("Text without code blocks"));
572 }
573
574 #[test]
575 fn test_has_emphasis_markers() {
576 assert!(has_emphasis_markers("*emphasis*"));
577 assert!(has_emphasis_markers("_emphasis_"));
578 assert!(has_emphasis_markers("**bold**"));
579 assert!(has_emphasis_markers("__bold__"));
580 assert!(!has_emphasis_markers("no emphasis"));
581 }
582
583 #[test]
584 fn test_has_html_tags() {
585 assert!(has_html_tags("<div>content</div>"));
586 assert!(has_html_tags("<br/>"));
587 assert!(has_html_tags("<img src='test.jpg'>"));
588 assert!(!has_html_tags("no html tags"));
589 assert!(!has_html_tags("less than < but no tag"));
590 }
591
592 #[test]
593 fn test_has_link_markers() {
594 assert!(has_link_markers("[text](url)"));
595 assert!(has_link_markers("[reference][1]"));
596 assert!(has_link_markers("http://example.com"));
597 assert!(has_link_markers("https://example.com"));
598 assert!(has_link_markers("ftp://example.com"));
599 assert!(!has_link_markers("no links here"));
600 }
601
602 #[test]
603 fn test_has_image_markers() {
604 assert!(has_image_markers(""));
605 assert!(has_image_markers(""));
606 assert!(!has_image_markers("[link](url)"));
607 assert!(!has_image_markers("no images"));
608 }
609
610 #[test]
611 fn test_contains_url() {
612 assert!(contains_url("http://example.com"));
613 assert!(contains_url("Text with https://example.com link"));
614 assert!(contains_url("ftp://example.com"));
615 assert!(!contains_url("Text without URL"));
616 assert!(!contains_url("http not followed by ://"));
617
618 assert!(!contains_url("http"));
620 assert!(!contains_url("https"));
621 assert!(!contains_url("://"));
622 assert!(contains_url("Visit http://site.com now"));
623 assert!(contains_url("See https://secure.site.com/path"));
624 }
625
626 #[test]
627 fn test_contains_url_performance() {
628 let long_text = "a".repeat(10000);
630 assert!(!contains_url(&long_text));
631
632 let text_with_url = format!("{long_text}https://example.com");
634 assert!(contains_url(&text_with_url));
635 }
636
637 #[test]
638 fn test_escape_regex() {
639 assert_eq!(escape_regex("a.b"), "a\\.b");
640 assert_eq!(escape_regex("a+b*c"), "a\\+b\\*c");
641 assert_eq!(escape_regex("(test)"), "\\(test\\)");
642 assert_eq!(escape_regex("[a-z]"), "\\[a-z\\]");
643 assert_eq!(escape_regex("normal text"), "normal text");
644
645 assert_eq!(escape_regex(".$^{[(|)*+?\\"), "\\.\\$\\^\\{\\[\\(\\|\\)\\*\\+\\?\\\\");
647
648 assert_eq!(escape_regex(""), "");
650
651 assert_eq!(escape_regex("test.com/path?query=1"), "test\\.com/path\\?query=1");
653 }
654
655 #[test]
656 fn test_static_regex_patterns() {
657 assert!(URL_REGEX.is_match("https://example.com"));
659 assert!(URL_REGEX.is_match("http://test.org/path"));
660 assert!(URL_REGEX.is_match("ftp://files.com"));
661 assert!(!URL_REGEX.is_match("not a url"));
662
663 assert!(ATX_HEADING_REGEX.is_match("# Heading"));
665 assert!(ATX_HEADING_REGEX.is_match(" ## Indented"));
666 assert!(ATX_HEADING_REGEX.is_match("### "));
667 assert!(!ATX_HEADING_REGEX.is_match("Not a heading"));
668
669 assert!(UNORDERED_LIST_MARKER_REGEX.is_match("* Item"));
671 assert!(UNORDERED_LIST_MARKER_REGEX.is_match("- Item"));
672 assert!(UNORDERED_LIST_MARKER_REGEX.is_match("+ Item"));
673 assert!(ORDERED_LIST_MARKER_REGEX.is_match("1. Item"));
674 assert!(ORDERED_LIST_MARKER_REGEX.is_match("99. Item"));
675
676 assert!(FENCED_CODE_BLOCK_START_REGEX.is_match("```"));
678 assert!(FENCED_CODE_BLOCK_START_REGEX.is_match("```rust"));
679 assert!(FENCED_CODE_BLOCK_START_REGEX.is_match("~~~"));
680 assert!(FENCED_CODE_BLOCK_END_REGEX.is_match("```"));
681 assert!(FENCED_CODE_BLOCK_END_REGEX.is_match("~~~"));
682
683 assert!(BOLD_ASTERISK_REGEX.is_match("**bold**"));
685 assert!(BOLD_UNDERSCORE_REGEX.is_match("__bold__"));
686 assert!(ITALIC_ASTERISK_REGEX.is_match("*italic*"));
687 assert!(ITALIC_UNDERSCORE_REGEX.is_match("_italic_"));
688
689 assert!(HTML_TAG_REGEX.is_match("<div>"));
691 assert!(HTML_TAG_REGEX.is_match("<span class='test'>"));
692 assert!(HTML_SELF_CLOSING_TAG_REGEX.is_match("<br/>"));
693 assert!(HTML_SELF_CLOSING_TAG_REGEX.is_match("<img src='test'/>"));
694
695 assert!(TRAILING_WHITESPACE_REGEX.is_match("line with spaces "));
697 assert!(TRAILING_WHITESPACE_REGEX.is_match("tabs\t\t"));
698 assert!(MULTIPLE_BLANK_LINES_REGEX.is_match("\n\n\n"));
699 assert!(MULTIPLE_BLANK_LINES_REGEX.is_match("\n\n\n\n"));
700
701 assert!(BLOCKQUOTE_PREFIX_RE.is_match("> Quote"));
703 assert!(BLOCKQUOTE_PREFIX_RE.is_match(" > Indented quote"));
704 assert!(BLOCKQUOTE_PREFIX_RE.is_match(">> Nested"));
705 }
706
707 #[test]
708 fn test_thread_safety() {
709 use std::thread;
710
711 let handles: Vec<_> = (0..10)
712 .map(|i| {
713 thread::spawn(move || {
714 let pattern = format!(r"\d{{{i}}}");
715 let regex = get_cached_regex(&pattern).unwrap();
716 assert!(regex.is_match(&"1".repeat(i)));
717 })
718 })
719 .collect();
720
721 for handle in handles {
722 handle.join().unwrap();
723 }
724 }
725}