rumdl_lib/utils/
regex_cache.rs1use fancy_regex::Regex as FancyRegex;
23use regex::Regex;
24use std::collections::HashMap;
25use std::sync::LazyLock;
26use std::sync::{Arc, Mutex};
27
28#[derive(Debug)]
30pub struct RegexCache {
31 cache: HashMap<String, Arc<Regex>>,
32 fancy_cache: HashMap<String, Arc<FancyRegex>>,
33 usage_stats: HashMap<String, u64>,
34}
35
36impl Default for RegexCache {
37 fn default() -> Self {
38 Self::new()
39 }
40}
41
42impl RegexCache {
43 pub fn new() -> Self {
44 Self {
45 cache: HashMap::new(),
46 fancy_cache: HashMap::new(),
47 usage_stats: HashMap::new(),
48 }
49 }
50
51 pub fn get_regex(&mut self, pattern: &str) -> Result<Arc<Regex>, regex::Error> {
53 if let Some(regex) = self.cache.get(pattern) {
54 *self.usage_stats.entry(pattern.to_string()).or_insert(0) += 1;
55 return Ok(regex.clone());
56 }
57
58 let regex = Arc::new(Regex::new(pattern)?);
59 self.cache.insert(pattern.to_string(), regex.clone());
60 *self.usage_stats.entry(pattern.to_string()).or_insert(0) += 1;
61 Ok(regex)
62 }
63
64 pub fn get_fancy_regex(&mut self, pattern: &str) -> Result<Arc<FancyRegex>, Box<fancy_regex::Error>> {
66 if let Some(regex) = self.fancy_cache.get(pattern) {
67 *self.usage_stats.entry(pattern.to_string()).or_insert(0) += 1;
68 return Ok(regex.clone());
69 }
70
71 match FancyRegex::new(pattern) {
72 Ok(regex) => {
73 let arc_regex = Arc::new(regex);
74 self.fancy_cache.insert(pattern.to_string(), arc_regex.clone());
75 *self.usage_stats.entry(pattern.to_string()).or_insert(0) += 1;
76 Ok(arc_regex)
77 }
78 Err(e) => Err(Box::new(e)),
79 }
80 }
81
82 pub fn get_stats(&self) -> HashMap<String, u64> {
84 self.usage_stats.clone()
85 }
86
87 pub fn clear(&mut self) {
89 self.cache.clear();
90 self.fancy_cache.clear();
91 self.usage_stats.clear();
92 }
93}
94
95static GLOBAL_REGEX_CACHE: LazyLock<Arc<Mutex<RegexCache>>> = LazyLock::new(|| Arc::new(Mutex::new(RegexCache::new())));
97
98pub fn get_cached_regex(pattern: &str) -> Result<Arc<Regex>, regex::Error> {
100 let mut cache = GLOBAL_REGEX_CACHE.lock().expect("Regex cache mutex poisoned");
101 cache.get_regex(pattern)
102}
103
104pub fn get_cached_fancy_regex(pattern: &str) -> Result<Arc<FancyRegex>, Box<fancy_regex::Error>> {
106 let mut cache = GLOBAL_REGEX_CACHE.lock().expect("Regex cache mutex poisoned");
107 cache.get_fancy_regex(pattern)
108}
109
110pub fn get_cache_stats() -> HashMap<String, u64> {
112 let cache = GLOBAL_REGEX_CACHE.lock().expect("Regex cache mutex poisoned");
113 cache.get_stats()
114}
115
116#[macro_export]
134macro_rules! regex_lazy {
135 ($pattern:expr) => {{
136 static REGEX: LazyLock<regex::Regex> = LazyLock::new(|| regex::Regex::new($pattern).unwrap());
137 &*REGEX
138 }};
139}
140
141#[macro_export]
148macro_rules! regex_cached {
149 ($pattern:expr) => {{ $crate::utils::regex_cache::get_cached_regex($pattern).expect("Failed to compile regex") }};
150}
151
152#[macro_export]
159macro_rules! fancy_regex_cached {
160 ($pattern:expr) => {{ $crate::utils::regex_cache::get_cached_fancy_regex($pattern).expect("Failed to compile fancy regex") }};
161}
162
163pub use crate::regex_lazy;
165
166pub static URL_REGEX: LazyLock<Regex> =
168 LazyLock::new(|| Regex::new(r#"(?:https?|ftp)://[^\s<>\[\]()'"]+[^\s<>\[\]()"'.,]"#).unwrap());
169pub static BARE_URL_REGEX: LazyLock<Regex> =
170 LazyLock::new(|| Regex::new(r"(?:https?|ftp)://[^\s<>]+[^\s<>.]").unwrap());
171pub static URL_PATTERN: LazyLock<Regex> =
172 LazyLock::new(|| Regex::new(r"((?:https?|ftp)://[^\s\)<>]+[^\s\)<>.,])").unwrap());
173
174pub static ATX_HEADING_REGEX: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"^(\s*)(#{1,6})(\s+|$)").unwrap());
176pub static CLOSED_ATX_HEADING_REGEX: LazyLock<Regex> =
177 LazyLock::new(|| Regex::new(r"^(\s*)(#{1,6})(\s+)(.*)(\s+)(#+)(\s*)$").unwrap());
178pub static SETEXT_HEADING_REGEX: LazyLock<Regex> =
179 LazyLock::new(|| Regex::new(r"^(\s*)[^\s]+.*\n(\s*)(=+|-+)\s*$").unwrap());
180pub static TRAILING_PUNCTUATION_REGEX: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"[.,:);!?]$").unwrap());
181
182pub static ATX_HEADING_WITH_CAPTURE: LazyLock<Regex> =
184 LazyLock::new(|| Regex::new(r"^(#{1,6})\s+(.+?)(?:\s+#*\s*)?$").unwrap());
185pub static SETEXT_HEADING_WITH_CAPTURE: LazyLock<FancyRegex> =
186 LazyLock::new(|| FancyRegex::new(r"^([^\n]+)\n([=\-])\2+\s*$").unwrap());
187
188pub static UNORDERED_LIST_MARKER_REGEX: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"^(\s*)([*+-])(\s+)").unwrap());
190pub static ORDERED_LIST_MARKER_REGEX: LazyLock<Regex> =
191 LazyLock::new(|| Regex::new(r"^(\s*)(\d+)([.)])(\s+)").unwrap());
192pub static LIST_MARKER_ANY_REGEX: LazyLock<Regex> =
193 LazyLock::new(|| Regex::new(r"^(\s*)(?:([*+-])|(\d+)[.)])(\s+)").unwrap());
194
195pub static FENCED_CODE_BLOCK_START_REGEX: LazyLock<Regex> =
197 LazyLock::new(|| Regex::new(r"^(\s*)(```|~~~)(.*)$").unwrap());
198pub static FENCED_CODE_BLOCK_END_REGEX: LazyLock<Regex> =
199 LazyLock::new(|| Regex::new(r"^(\s*)(```|~~~)(\s*)$").unwrap());
200pub static INDENTED_CODE_BLOCK_REGEX: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"^(\s{4,})(.*)$").unwrap());
201pub static CODE_FENCE_REGEX: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"^(`{3,}|~{3,})").unwrap());
202
203pub static EMPHASIS_REGEX: LazyLock<FancyRegex> =
205 LazyLock::new(|| FancyRegex::new(r"(\s|^)(\*{1,2}|_{1,2})(?=\S)(.+?)(?<=\S)(\2)(\s|$)").unwrap());
206pub static SPACE_IN_EMPHASIS_REGEX: LazyLock<FancyRegex> =
207 LazyLock::new(|| FancyRegex::new(r"(\*|_)(\s+)(.+?)(\s+)(\1)").unwrap());
208
209pub static ASTERISK_EMPHASIS: LazyLock<Regex> =
213 LazyLock::new(|| Regex::new(r"(?:^|[^*])\*(\s+[^*]+\s*|\s*[^*]+\s+)\*(?:[^*]|$)").unwrap());
214pub static UNDERSCORE_EMPHASIS: LazyLock<Regex> =
215 LazyLock::new(|| Regex::new(r"(?:^|[^_])_(\s+[^_]+\s*|\s*[^_]+\s+)_(?:[^_]|$)").unwrap());
216pub static DOUBLE_UNDERSCORE_EMPHASIS: LazyLock<Regex> =
217 LazyLock::new(|| Regex::new(r"(?:^|[^_])__(\s+[^_]+\s*|\s*[^_]+\s+)__(?:[^_]|$)").unwrap());
218pub static DOUBLE_ASTERISK_EMPHASIS: LazyLock<FancyRegex> =
219 LazyLock::new(|| FancyRegex::new(r"\*\*\s+([^*]+?)\s+\*\*").unwrap());
220pub static DOUBLE_ASTERISK_SPACE_START: LazyLock<FancyRegex> =
221 LazyLock::new(|| FancyRegex::new(r"\*\*\s+([^*]+?)\*\*").unwrap());
222pub static DOUBLE_ASTERISK_SPACE_END: LazyLock<FancyRegex> =
223 LazyLock::new(|| FancyRegex::new(r"\*\*([^*]+?)\s+\*\*").unwrap());
224
225pub static FENCED_CODE_BLOCK_START: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"^(\s*)```(?:[^`\r\n]*)$").unwrap());
227pub static FENCED_CODE_BLOCK_END: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"^(\s*)```\s*$").unwrap());
228pub static ALTERNATE_FENCED_CODE_BLOCK_START: LazyLock<Regex> =
229 LazyLock::new(|| Regex::new(r"^(\s*)~~~(?:[^~\r\n]*)$").unwrap());
230pub static ALTERNATE_FENCED_CODE_BLOCK_END: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"^(\s*)~~~\s*$").unwrap());
231pub static INDENTED_CODE_BLOCK_PATTERN: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"^(\s{4,})").unwrap());
232
233pub static HTML_TAG_REGEX: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"<([a-zA-Z][^>]*)>").unwrap());
235pub static HTML_SELF_CLOSING_TAG_REGEX: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"<([a-zA-Z][^>]*/)>").unwrap());
236pub static HTML_TAG_FINDER: LazyLock<Regex> = LazyLock::new(|| Regex::new("(?i)</?[a-zA-Z][^>]*>").unwrap());
237pub static HTML_OPENING_TAG_FINDER: LazyLock<Regex> = LazyLock::new(|| Regex::new("(?i)<[a-zA-Z][^>]*>").unwrap());
238pub static HTML_TAG_QUICK_CHECK: LazyLock<Regex> = LazyLock::new(|| Regex::new("(?i)</?[a-zA-Z]").unwrap());
239
240pub static LINK_REFERENCE_DEFINITION_REGEX: LazyLock<Regex> =
242 LazyLock::new(|| Regex::new(r"^\s*\[([^\]]+)\]:\s+(.+)$").unwrap());
243pub static INLINE_LINK_REGEX: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"\[([^\]]+)\]\(([^)]+)\)").unwrap());
244pub static LINK_TEXT_REGEX: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"\[([^\]]*)\]").unwrap());
245pub static LINK_REGEX: LazyLock<FancyRegex> =
246 LazyLock::new(|| FancyRegex::new(r"(?<!\\)\[([^\]]*)\]\(([^)#]*)#([^)]+)\)").unwrap());
247pub static EXTERNAL_URL_REGEX: LazyLock<FancyRegex> =
248 LazyLock::new(|| FancyRegex::new(r"^(https?://|ftp://|www\.|[^/]+\.[a-z]{2,})").unwrap());
249
250pub static IMAGE_REGEX: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"!\[([^\]]*)\]\(([^)]+)\)").unwrap());
252
253pub static TRAILING_WHITESPACE_REGEX: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"\s+$").unwrap());
255pub static MULTIPLE_BLANK_LINES_REGEX: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"\n{3,}").unwrap());
256
257pub static FRONT_MATTER_REGEX: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"^---\n.*?\n---\n").unwrap());
259
260pub static INLINE_CODE_REGEX: LazyLock<FancyRegex> = LazyLock::new(|| FancyRegex::new(r"`[^`]+`").unwrap());
262pub static BOLD_ASTERISK_REGEX: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"\*\*(.+?)\*\*").unwrap());
263pub static BOLD_UNDERSCORE_REGEX: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"__(.+?)__").unwrap());
264pub static ITALIC_ASTERISK_REGEX: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"\*([^*]+?)\*").unwrap());
265pub static ITALIC_UNDERSCORE_REGEX: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"_([^_]+?)_").unwrap());
266pub static LINK_TEXT_FULL_REGEX: LazyLock<FancyRegex> =
267 LazyLock::new(|| FancyRegex::new(r"\[([^\]]*)\]\([^)]*\)").unwrap());
268pub static STRIKETHROUGH_REGEX: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"~~(.+?)~~").unwrap());
269pub static MULTIPLE_HYPHENS: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"-{2,}").unwrap());
270pub static TOC_SECTION_START: LazyLock<Regex> =
271 LazyLock::new(|| Regex::new(r"^#+\s*(?:Table of Contents|Contents|TOC)\s*$").unwrap());
272
273pub static BLOCKQUOTE_PREFIX_RE: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"^(\s*>+\s*)").unwrap());
275
276pub static IMAGE_REF_PATTERN: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"^!\[.*?\]\[.*?\]$").unwrap());
278pub static LINK_REF_PATTERN: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"^\[.*?\]:\s*https?://\S+$").unwrap());
279pub static URL_IN_TEXT: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"https?://\S+").unwrap());
280pub static SENTENCE_END: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"[.!?]\s+[A-Z]").unwrap());
281pub static ABBREVIATION: LazyLock<Regex> = LazyLock::new(|| {
282 Regex::new(r"\b(?:Mr|Mrs|Ms|Dr|Prof|Sr|Jr|vs|etc|i\.e|e\.g|Inc|Corp|Ltd|Co|St|Ave|Blvd|Rd|Ph\.D|M\.D|B\.A|M\.A|Ph\.D|U\.S|U\.K|U\.N|N\.Y|L\.A|D\.C)\.\s+[A-Z]").unwrap()
283});
284pub static DECIMAL_NUMBER: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"\d+\.\s*\d+").unwrap());
285pub static LIST_ITEM: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"^\s*\d+\.\s+").unwrap());
286pub static REFERENCE_LINK: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"\[([^\]]*)\]\[([^\]]*)\]").unwrap());
287
288pub static EMAIL_PATTERN: LazyLock<Regex> =
290 LazyLock::new(|| Regex::new(r"[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}").unwrap());
291
292pub static REF_LINK_REGEX: LazyLock<FancyRegex> =
296 LazyLock::new(|| FancyRegex::new(r"(?<!\\)\[((?:[^\[\]\\]|\\.|\[[^\]]*\])*)\]\[([^\]]*)\]").unwrap());
297
298pub static SHORTCUT_REF_REGEX: LazyLock<FancyRegex> =
303 LazyLock::new(|| FancyRegex::new(r"(?<![\\)\]])\[((?:[^\[\]\\]|\\.|\[[^\]]*\])*)\](?!\s*[\[\(])").unwrap());
304
305pub static INLINE_LINK_FANCY_REGEX: LazyLock<FancyRegex> =
307 LazyLock::new(|| FancyRegex::new(r"(?<!\\)\[([^\]]+)\]\(([^)]+)\)").unwrap());
308
309pub static INLINE_IMAGE_FANCY_REGEX: LazyLock<FancyRegex> =
311 LazyLock::new(|| FancyRegex::new(r"!\[([^\]]*)\]\(([^)]+)\)").unwrap());
312
313pub static LINKED_IMAGE_INLINE_INLINE: LazyLock<FancyRegex> =
321 LazyLock::new(|| FancyRegex::new(r"\[!\[([^\]]*)\]\(([^)]+)\)\]\(([^)]+)\)").unwrap());
322
323pub static LINKED_IMAGE_REF_INLINE: LazyLock<FancyRegex> =
325 LazyLock::new(|| FancyRegex::new(r"\[!\[([^\]]*)\]\[([^\]]*)\]\]\(([^)]+)\)").unwrap());
326
327pub static LINKED_IMAGE_INLINE_REF: LazyLock<FancyRegex> =
329 LazyLock::new(|| FancyRegex::new(r"\[!\[([^\]]*)\]\(([^)]+)\)\]\[([^\]]*)\]").unwrap());
330
331pub static LINKED_IMAGE_REF_REF: LazyLock<FancyRegex> =
333 LazyLock::new(|| FancyRegex::new(r"\[!\[([^\]]*)\]\[([^\]]*)\]\]\[([^\]]*)\]").unwrap());
334
335pub static REF_IMAGE_REGEX: LazyLock<FancyRegex> =
337 LazyLock::new(|| FancyRegex::new(r"!\[((?:[^\[\]\\]|\\.|\[[^\]]*\])*)\]\[([^\]]*)\]").unwrap());
338
339pub static FOOTNOTE_REF_REGEX: LazyLock<FancyRegex> = LazyLock::new(|| FancyRegex::new(r"\[\^([^\]]+)\]").unwrap());
341
342pub static STRIKETHROUGH_FANCY_REGEX: LazyLock<FancyRegex> = LazyLock::new(|| FancyRegex::new(r"~~([^~]+)~~").unwrap());
344
345pub static WIKI_LINK_REGEX: LazyLock<FancyRegex> = LazyLock::new(|| FancyRegex::new(r"\[\[([^\]]+)\]\]").unwrap());
347
348pub static INLINE_MATH_REGEX: LazyLock<FancyRegex> =
350 LazyLock::new(|| FancyRegex::new(r"(?<!\$)\$(?!\$)([^\$]+)\$(?!\$)").unwrap());
351pub static DISPLAY_MATH_REGEX: LazyLock<FancyRegex> = LazyLock::new(|| FancyRegex::new(r"\$\$([^\$]+)\$\$").unwrap());
352
353pub static EMOJI_SHORTCODE_REGEX: LazyLock<FancyRegex> =
355 LazyLock::new(|| FancyRegex::new(r":([a-zA-Z0-9_+-]+):").unwrap());
356
357pub static HTML_TAG_PATTERN: LazyLock<FancyRegex> =
359 LazyLock::new(|| FancyRegex::new(r"</?[a-zA-Z][^>]*>|<[a-zA-Z][^>]*/\s*>").unwrap());
360
361pub static HTML_ENTITY_REGEX: LazyLock<FancyRegex> =
363 LazyLock::new(|| FancyRegex::new(r"&[a-zA-Z][a-zA-Z0-9]*;|&#\d+;|&#x[0-9a-fA-F]+;").unwrap());
364
365pub static HTML_COMMENT_START: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"<!--").unwrap());
368pub static HTML_COMMENT_END: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"-->").unwrap());
369pub static HTML_COMMENT_PATTERN: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"<!--[\s\S]*?-->").unwrap());
370
371pub static HTML_HEADING_PATTERN: LazyLock<FancyRegex> =
373 LazyLock::new(|| FancyRegex::new(r"^\s*<h([1-6])(?:\s[^>]*)?>.*</h\1>\s*$").unwrap());
374
375pub static HEADING_CHECK: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"(?m)^(?:\s*)#").unwrap());
377
378pub static HR_DASH: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"^\-{3,}\s*$").unwrap());
380pub static HR_ASTERISK: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"^\*{3,}\s*$").unwrap());
381pub static HR_UNDERSCORE: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"^_{3,}\s*$").unwrap());
382pub static HR_SPACED_DASH: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"^(\-\s+){2,}\-\s*$").unwrap());
383pub static HR_SPACED_ASTERISK: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"^(\*\s+){2,}\*\s*$").unwrap());
384pub static HR_SPACED_UNDERSCORE: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"^(_\s+){2,}_\s*$").unwrap());
385
386pub fn has_heading_markers(content: &str) -> bool {
389 content.contains('#')
390}
391
392pub fn has_list_markers(content: &str) -> bool {
394 content.contains('*')
395 || content.contains('-')
396 || content.contains('+')
397 || (content.contains('.') && content.contains(|c: char| c.is_ascii_digit()))
398}
399
400pub fn has_code_block_markers(content: &str) -> bool {
402 content.contains("```") || content.contains("~~~") || content.contains("\n ")
403 }
405
406pub fn has_emphasis_markers(content: &str) -> bool {
408 content.contains('*') || content.contains('_')
409}
410
411pub fn has_html_tags(content: &str) -> bool {
413 content.contains('<') && (content.contains('>') || content.contains("/>"))
414}
415
416pub fn has_link_markers(content: &str) -> bool {
418 (content.contains('[') && content.contains(']'))
419 || content.contains("http://")
420 || content.contains("https://")
421 || content.contains("ftp://")
422}
423
424pub fn has_image_markers(content: &str) -> bool {
426 content.contains("![")
427}
428
429pub fn contains_url(content: &str) -> bool {
432 if !content.contains("://") {
434 return false;
435 }
436
437 let chars: Vec<char> = content.chars().collect();
438 let mut i = 0;
439
440 while i < chars.len() {
441 if i + 2 < chars.len()
443 && ((chars[i] == 'h' && chars[i + 1] == 't' && chars[i + 2] == 't')
444 || (chars[i] == 'f' && chars[i + 1] == 't' && chars[i + 2] == 'p'))
445 {
446 let mut j = i;
448 while j + 2 < chars.len() {
449 if chars[j] == ':' && chars[j + 1] == '/' && chars[j + 2] == '/' {
450 return true;
451 }
452 j += 1;
453
454 if j > i + 10 {
456 break;
457 }
458 }
459 }
460 i += 1;
461 }
462
463 false
464}
465
466pub fn escape_regex(s: &str) -> String {
468 let special_chars = ['.', '+', '*', '?', '^', '$', '(', ')', '[', ']', '{', '}', '|', '\\'];
469 let mut result = String::with_capacity(s.len() * 2);
470
471 for c in s.chars() {
472 if special_chars.contains(&c) {
473 result.push('\\');
474 }
475 result.push(c);
476 }
477
478 result
479}
480
481#[cfg(test)]
482mod tests {
483 use super::*;
484
485 #[test]
486 fn test_regex_cache_new() {
487 let cache = RegexCache::new();
488 assert!(cache.cache.is_empty());
489 assert!(cache.fancy_cache.is_empty());
490 assert!(cache.usage_stats.is_empty());
491 }
492
493 #[test]
494 fn test_regex_cache_default() {
495 let cache = RegexCache::default();
496 assert!(cache.cache.is_empty());
497 assert!(cache.fancy_cache.is_empty());
498 assert!(cache.usage_stats.is_empty());
499 }
500
501 #[test]
502 fn test_get_regex_compilation() {
503 let mut cache = RegexCache::new();
504
505 let regex1 = cache.get_regex(r"\d+").unwrap();
507 assert_eq!(cache.cache.len(), 1);
508 assert_eq!(cache.usage_stats.get(r"\d+"), Some(&1));
509
510 let regex2 = cache.get_regex(r"\d+").unwrap();
512 assert_eq!(cache.cache.len(), 1);
513 assert_eq!(cache.usage_stats.get(r"\d+"), Some(&2));
514
515 assert!(Arc::ptr_eq(®ex1, ®ex2));
517 }
518
519 #[test]
520 fn test_get_regex_invalid_pattern() {
521 let mut cache = RegexCache::new();
522 let result = cache.get_regex(r"[unterminated");
523 assert!(result.is_err());
524 assert!(cache.cache.is_empty());
525 }
526
527 #[test]
528 fn test_get_fancy_regex_compilation() {
529 let mut cache = RegexCache::new();
530
531 let regex1 = cache.get_fancy_regex(r"(?<=foo)bar").unwrap();
533 assert_eq!(cache.fancy_cache.len(), 1);
534 assert_eq!(cache.usage_stats.get(r"(?<=foo)bar"), Some(&1));
535
536 let regex2 = cache.get_fancy_regex(r"(?<=foo)bar").unwrap();
538 assert_eq!(cache.fancy_cache.len(), 1);
539 assert_eq!(cache.usage_stats.get(r"(?<=foo)bar"), Some(&2));
540
541 assert!(Arc::ptr_eq(®ex1, ®ex2));
543 }
544
545 #[test]
546 fn test_get_fancy_regex_invalid_pattern() {
547 let mut cache = RegexCache::new();
548 let result = cache.get_fancy_regex(r"(?<=invalid");
549 assert!(result.is_err());
550 assert!(cache.fancy_cache.is_empty());
551 }
552
553 #[test]
554 fn test_get_stats() {
555 let mut cache = RegexCache::new();
556
557 let _ = cache.get_regex(r"\d+").unwrap();
559 let _ = cache.get_regex(r"\d+").unwrap();
560 let _ = cache.get_regex(r"\w+").unwrap();
561 let _ = cache.get_fancy_regex(r"(?<=foo)bar").unwrap();
562
563 let stats = cache.get_stats();
564 assert_eq!(stats.get(r"\d+"), Some(&2));
565 assert_eq!(stats.get(r"\w+"), Some(&1));
566 assert_eq!(stats.get(r"(?<=foo)bar"), Some(&1));
567 }
568
569 #[test]
570 fn test_clear_cache() {
571 let mut cache = RegexCache::new();
572
573 let _ = cache.get_regex(r"\d+").unwrap();
575 let _ = cache.get_fancy_regex(r"(?<=foo)bar").unwrap();
576
577 assert!(!cache.cache.is_empty());
578 assert!(!cache.fancy_cache.is_empty());
579 assert!(!cache.usage_stats.is_empty());
580
581 cache.clear();
583
584 assert!(cache.cache.is_empty());
585 assert!(cache.fancy_cache.is_empty());
586 assert!(cache.usage_stats.is_empty());
587 }
588
589 #[test]
590 fn test_global_cache_functions() {
591 let regex1 = get_cached_regex(r"\d{3}").unwrap();
593 let regex2 = get_cached_regex(r"\d{3}").unwrap();
594 assert!(Arc::ptr_eq(®ex1, ®ex2));
595
596 let fancy1 = get_cached_fancy_regex(r"(?<=test)ing").unwrap();
598 let fancy2 = get_cached_fancy_regex(r"(?<=test)ing").unwrap();
599 assert!(Arc::ptr_eq(&fancy1, &fancy2));
600
601 let stats = get_cache_stats();
603 assert!(stats.contains_key(r"\d{3}"));
604 assert!(stats.contains_key(r"(?<=test)ing"));
605 }
606
607 #[test]
608 fn test_regex_lazy_macro() {
609 let re = regex_lazy!(r"^test.*end$");
610 assert!(re.is_match("test something end"));
611 assert!(!re.is_match("test something"));
612
613 let re2 = regex_lazy!(r"^start.*finish$");
617 assert!(re2.is_match("start and finish"));
618 assert!(!re2.is_match("start without end"));
619 }
620
621 #[test]
622 fn test_has_heading_markers() {
623 assert!(has_heading_markers("# Heading"));
624 assert!(has_heading_markers("Text with # symbol"));
625 assert!(!has_heading_markers("Text without heading marker"));
626 }
627
628 #[test]
629 fn test_has_list_markers() {
630 assert!(has_list_markers("* Item"));
631 assert!(has_list_markers("- Item"));
632 assert!(has_list_markers("+ Item"));
633 assert!(has_list_markers("1. Item"));
634 assert!(!has_list_markers("Text without list markers"));
635 }
636
637 #[test]
638 fn test_has_code_block_markers() {
639 assert!(has_code_block_markers("```code```"));
640 assert!(has_code_block_markers("~~~code~~~"));
641 assert!(has_code_block_markers("Text\n indented code"));
642 assert!(!has_code_block_markers("Text without code blocks"));
643 }
644
645 #[test]
646 fn test_has_emphasis_markers() {
647 assert!(has_emphasis_markers("*emphasis*"));
648 assert!(has_emphasis_markers("_emphasis_"));
649 assert!(has_emphasis_markers("**bold**"));
650 assert!(has_emphasis_markers("__bold__"));
651 assert!(!has_emphasis_markers("no emphasis"));
652 }
653
654 #[test]
655 fn test_has_html_tags() {
656 assert!(has_html_tags("<div>content</div>"));
657 assert!(has_html_tags("<br/>"));
658 assert!(has_html_tags("<img src='test.jpg'>"));
659 assert!(!has_html_tags("no html tags"));
660 assert!(!has_html_tags("less than < but no tag"));
661 }
662
663 #[test]
664 fn test_has_link_markers() {
665 assert!(has_link_markers("[text](url)"));
666 assert!(has_link_markers("[reference][1]"));
667 assert!(has_link_markers("http://example.com"));
668 assert!(has_link_markers("https://example.com"));
669 assert!(has_link_markers("ftp://example.com"));
670 assert!(!has_link_markers("no links here"));
671 }
672
673 #[test]
674 fn test_has_image_markers() {
675 assert!(has_image_markers(""));
676 assert!(has_image_markers(""));
677 assert!(!has_image_markers("[link](url)"));
678 assert!(!has_image_markers("no images"));
679 }
680
681 #[test]
682 fn test_contains_url() {
683 assert!(contains_url("http://example.com"));
684 assert!(contains_url("Text with https://example.com link"));
685 assert!(contains_url("ftp://example.com"));
686 assert!(!contains_url("Text without URL"));
687 assert!(!contains_url("http not followed by ://"));
688
689 assert!(!contains_url("http"));
691 assert!(!contains_url("https"));
692 assert!(!contains_url("://"));
693 assert!(contains_url("Visit http://site.com now"));
694 assert!(contains_url("See https://secure.site.com/path"));
695 }
696
697 #[test]
698 fn test_contains_url_performance() {
699 let long_text = "a".repeat(10000);
701 assert!(!contains_url(&long_text));
702
703 let text_with_url = format!("{long_text}https://example.com");
705 assert!(contains_url(&text_with_url));
706 }
707
708 #[test]
709 fn test_escape_regex() {
710 assert_eq!(escape_regex("a.b"), "a\\.b");
711 assert_eq!(escape_regex("a+b*c"), "a\\+b\\*c");
712 assert_eq!(escape_regex("(test)"), "\\(test\\)");
713 assert_eq!(escape_regex("[a-z]"), "\\[a-z\\]");
714 assert_eq!(escape_regex("normal text"), "normal text");
715
716 assert_eq!(escape_regex(".$^{[(|)*+?\\"), "\\.\\$\\^\\{\\[\\(\\|\\)\\*\\+\\?\\\\");
718
719 assert_eq!(escape_regex(""), "");
721
722 assert_eq!(escape_regex("test.com/path?query=1"), "test\\.com/path\\?query=1");
724 }
725
726 #[test]
727 fn test_static_regex_patterns() {
728 assert!(URL_REGEX.is_match("https://example.com"));
730 assert!(URL_REGEX.is_match("http://test.org/path"));
731 assert!(URL_REGEX.is_match("ftp://files.com"));
732 assert!(!URL_REGEX.is_match("not a url"));
733
734 assert!(ATX_HEADING_REGEX.is_match("# Heading"));
736 assert!(ATX_HEADING_REGEX.is_match(" ## Indented"));
737 assert!(ATX_HEADING_REGEX.is_match("### "));
738 assert!(!ATX_HEADING_REGEX.is_match("Not a heading"));
739
740 assert!(UNORDERED_LIST_MARKER_REGEX.is_match("* Item"));
742 assert!(UNORDERED_LIST_MARKER_REGEX.is_match("- Item"));
743 assert!(UNORDERED_LIST_MARKER_REGEX.is_match("+ Item"));
744 assert!(ORDERED_LIST_MARKER_REGEX.is_match("1. Item"));
745 assert!(ORDERED_LIST_MARKER_REGEX.is_match("99. Item"));
746
747 assert!(FENCED_CODE_BLOCK_START_REGEX.is_match("```"));
749 assert!(FENCED_CODE_BLOCK_START_REGEX.is_match("```rust"));
750 assert!(FENCED_CODE_BLOCK_START_REGEX.is_match("~~~"));
751 assert!(FENCED_CODE_BLOCK_END_REGEX.is_match("```"));
752 assert!(FENCED_CODE_BLOCK_END_REGEX.is_match("~~~"));
753
754 assert!(BOLD_ASTERISK_REGEX.is_match("**bold**"));
756 assert!(BOLD_UNDERSCORE_REGEX.is_match("__bold__"));
757 assert!(ITALIC_ASTERISK_REGEX.is_match("*italic*"));
758 assert!(ITALIC_UNDERSCORE_REGEX.is_match("_italic_"));
759
760 assert!(HTML_TAG_REGEX.is_match("<div>"));
762 assert!(HTML_TAG_REGEX.is_match("<span class='test'>"));
763 assert!(HTML_SELF_CLOSING_TAG_REGEX.is_match("<br/>"));
764 assert!(HTML_SELF_CLOSING_TAG_REGEX.is_match("<img src='test'/>"));
765
766 assert!(TRAILING_WHITESPACE_REGEX.is_match("line with spaces "));
768 assert!(TRAILING_WHITESPACE_REGEX.is_match("tabs\t\t"));
769 assert!(MULTIPLE_BLANK_LINES_REGEX.is_match("\n\n\n"));
770 assert!(MULTIPLE_BLANK_LINES_REGEX.is_match("\n\n\n\n"));
771
772 assert!(BLOCKQUOTE_PREFIX_RE.is_match("> Quote"));
774 assert!(BLOCKQUOTE_PREFIX_RE.is_match(" > Indented quote"));
775 assert!(BLOCKQUOTE_PREFIX_RE.is_match(">> Nested"));
776 }
777
778 #[test]
779 fn test_thread_safety() {
780 use std::thread;
781
782 let handles: Vec<_> = (0..10)
783 .map(|i| {
784 thread::spawn(move || {
785 let pattern = format!(r"\d{{{i}}}");
786 let regex = get_cached_regex(&pattern).unwrap();
787 assert!(regex.is_match(&"1".repeat(i)));
788 })
789 })
790 .collect();
791
792 for handle in handles {
793 handle.join().unwrap();
794 }
795 }
796}