rumdl_lib/utils/
regex_cache.rs1use fancy_regex::Regex as FancyRegex;
23use regex::Regex;
24use std::collections::HashMap;
25use std::sync::LazyLock;
26use std::sync::{Arc, Mutex};
27
28#[derive(Debug)]
30pub struct RegexCache {
31 cache: HashMap<String, Arc<Regex>>,
32 fancy_cache: HashMap<String, Arc<FancyRegex>>,
33 usage_stats: HashMap<String, u64>,
34}
35
36impl Default for RegexCache {
37 fn default() -> Self {
38 Self::new()
39 }
40}
41
42impl RegexCache {
43 pub fn new() -> Self {
44 Self {
45 cache: HashMap::new(),
46 fancy_cache: HashMap::new(),
47 usage_stats: HashMap::new(),
48 }
49 }
50
51 pub fn get_regex(&mut self, pattern: &str) -> Result<Arc<Regex>, regex::Error> {
53 if let Some(regex) = self.cache.get(pattern) {
54 *self.usage_stats.entry(pattern.to_string()).or_insert(0) += 1;
55 return Ok(regex.clone());
56 }
57
58 let regex = Arc::new(Regex::new(pattern)?);
59 self.cache.insert(pattern.to_string(), regex.clone());
60 *self.usage_stats.entry(pattern.to_string()).or_insert(0) += 1;
61 Ok(regex)
62 }
63
64 pub fn get_fancy_regex(&mut self, pattern: &str) -> Result<Arc<FancyRegex>, Box<fancy_regex::Error>> {
66 if let Some(regex) = self.fancy_cache.get(pattern) {
67 *self.usage_stats.entry(pattern.to_string()).or_insert(0) += 1;
68 return Ok(regex.clone());
69 }
70
71 match FancyRegex::new(pattern) {
72 Ok(regex) => {
73 let arc_regex = Arc::new(regex);
74 self.fancy_cache.insert(pattern.to_string(), arc_regex.clone());
75 *self.usage_stats.entry(pattern.to_string()).or_insert(0) += 1;
76 Ok(arc_regex)
77 }
78 Err(e) => Err(Box::new(e)),
79 }
80 }
81
82 pub fn get_stats(&self) -> HashMap<String, u64> {
84 self.usage_stats.clone()
85 }
86
87 pub fn clear(&mut self) {
89 self.cache.clear();
90 self.fancy_cache.clear();
91 self.usage_stats.clear();
92 }
93}
94
95static GLOBAL_REGEX_CACHE: LazyLock<Arc<Mutex<RegexCache>>> = LazyLock::new(|| Arc::new(Mutex::new(RegexCache::new())));
97
98pub fn get_cached_regex(pattern: &str) -> Result<Arc<Regex>, regex::Error> {
100 let mut cache = GLOBAL_REGEX_CACHE.lock().expect("Regex cache mutex poisoned");
101 cache.get_regex(pattern)
102}
103
104pub fn get_cached_fancy_regex(pattern: &str) -> Result<Arc<FancyRegex>, Box<fancy_regex::Error>> {
106 let mut cache = GLOBAL_REGEX_CACHE.lock().expect("Regex cache mutex poisoned");
107 cache.get_fancy_regex(pattern)
108}
109
110pub fn get_cache_stats() -> HashMap<String, u64> {
112 let cache = GLOBAL_REGEX_CACHE.lock().expect("Regex cache mutex poisoned");
113 cache.get_stats()
114}
115
116#[macro_export]
134macro_rules! regex_lazy {
135 ($pattern:expr) => {{
136 static REGEX: LazyLock<regex::Regex> = LazyLock::new(|| regex::Regex::new($pattern).unwrap());
137 &*REGEX
138 }};
139}
140
141#[macro_export]
148macro_rules! regex_cached {
149 ($pattern:expr) => {{ $crate::utils::regex_cache::get_cached_regex($pattern).expect("Failed to compile regex") }};
150}
151
152#[macro_export]
159macro_rules! fancy_regex_cached {
160 ($pattern:expr) => {{ $crate::utils::regex_cache::get_cached_fancy_regex($pattern).expect("Failed to compile fancy regex") }};
161}
162
163pub use crate::regex_lazy;
165
166pub static URL_REGEX: LazyLock<Regex> =
168 LazyLock::new(|| Regex::new(r#"(?:https?|ftp)://[^\s<>\[\]()'"]+[^\s<>\[\]()"'.,]"#).unwrap());
169pub static BARE_URL_REGEX: LazyLock<Regex> =
170 LazyLock::new(|| Regex::new(r"(?:https?|ftp)://[^\s<>]+[^\s<>.]").unwrap());
171pub static URL_PATTERN: LazyLock<Regex> =
172 LazyLock::new(|| Regex::new(r"((?:https?|ftp)://[^\s\)<>]+[^\s\)<>.,])").unwrap());
173
174pub static ATX_HEADING_REGEX: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"^(\s*)(#{1,6})(\s+|$)").unwrap());
176pub static CLOSED_ATX_HEADING_REGEX: LazyLock<Regex> =
177 LazyLock::new(|| Regex::new(r"^(\s*)(#{1,6})(\s+)(.*)(\s+)(#+)(\s*)$").unwrap());
178pub static SETEXT_HEADING_REGEX: LazyLock<Regex> =
179 LazyLock::new(|| Regex::new(r"^(\s*)[^\s]+.*\n(\s*)(=+|-+)\s*$").unwrap());
180pub static TRAILING_PUNCTUATION_REGEX: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"[.,:);!?]$").unwrap());
181
182pub static ATX_HEADING_WITH_CAPTURE: LazyLock<Regex> =
184 LazyLock::new(|| Regex::new(r"^(#{1,6})\s+(.+?)(?:\s+#*\s*)?$").unwrap());
185pub static SETEXT_HEADING_WITH_CAPTURE: LazyLock<FancyRegex> =
186 LazyLock::new(|| FancyRegex::new(r"^([^\n]+)\n([=\-])\2+\s*$").unwrap());
187
188pub static UNORDERED_LIST_MARKER_REGEX: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"^(\s*)([*+-])(\s+)").unwrap());
190pub static ORDERED_LIST_MARKER_REGEX: LazyLock<Regex> =
191 LazyLock::new(|| Regex::new(r"^(\s*)(\d+)([.)])(\s+)").unwrap());
192pub static LIST_MARKER_ANY_REGEX: LazyLock<Regex> =
193 LazyLock::new(|| Regex::new(r"^(\s*)(?:([*+-])|(\d+)[.)])(\s+)").unwrap());
194
195pub static FENCED_CODE_BLOCK_START_REGEX: LazyLock<Regex> =
197 LazyLock::new(|| Regex::new(r"^(\s*)(```|~~~)(.*)$").unwrap());
198pub static FENCED_CODE_BLOCK_END_REGEX: LazyLock<Regex> =
199 LazyLock::new(|| Regex::new(r"^(\s*)(```|~~~)(\s*)$").unwrap());
200pub static INDENTED_CODE_BLOCK_REGEX: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"^(\s{4,})(.*)$").unwrap());
201pub static CODE_FENCE_REGEX: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"^(`{3,}|~{3,})").unwrap());
202
203pub static EMPHASIS_REGEX: LazyLock<FancyRegex> =
205 LazyLock::new(|| FancyRegex::new(r"(\s|^)(\*{1,2}|_{1,2})(?=\S)(.+?)(?<=\S)(\2)(\s|$)").unwrap());
206pub static SPACE_IN_EMPHASIS_REGEX: LazyLock<FancyRegex> =
207 LazyLock::new(|| FancyRegex::new(r"(\*|_)(\s+)(.+?)(\s+)(\1)").unwrap());
208
209pub static ASTERISK_EMPHASIS: LazyLock<Regex> =
213 LazyLock::new(|| Regex::new(r"(?:^|[^*])\*(\s+[^*]+\s*|\s*[^*]+\s+)\*(?:[^*]|$)").unwrap());
214pub static UNDERSCORE_EMPHASIS: LazyLock<Regex> =
215 LazyLock::new(|| Regex::new(r"(?:^|[^_])_(\s+[^_]+\s*|\s*[^_]+\s+)_(?:[^_]|$)").unwrap());
216pub static DOUBLE_UNDERSCORE_EMPHASIS: LazyLock<Regex> =
217 LazyLock::new(|| Regex::new(r"(?:^|[^_])__(\s+[^_]+\s*|\s*[^_]+\s+)__(?:[^_]|$)").unwrap());
218pub static DOUBLE_ASTERISK_EMPHASIS: LazyLock<FancyRegex> =
219 LazyLock::new(|| FancyRegex::new(r"\*\*\s+([^*]+?)\s+\*\*").unwrap());
220pub static DOUBLE_ASTERISK_SPACE_START: LazyLock<FancyRegex> =
221 LazyLock::new(|| FancyRegex::new(r"\*\*\s+([^*]+?)\*\*").unwrap());
222pub static DOUBLE_ASTERISK_SPACE_END: LazyLock<FancyRegex> =
223 LazyLock::new(|| FancyRegex::new(r"\*\*([^*]+?)\s+\*\*").unwrap());
224
225pub static FENCED_CODE_BLOCK_START: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"^(\s*)```(?:[^`\r\n]*)$").unwrap());
227pub static FENCED_CODE_BLOCK_END: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"^(\s*)```\s*$").unwrap());
228pub static ALTERNATE_FENCED_CODE_BLOCK_START: LazyLock<Regex> =
229 LazyLock::new(|| Regex::new(r"^(\s*)~~~(?:[^~\r\n]*)$").unwrap());
230pub static ALTERNATE_FENCED_CODE_BLOCK_END: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"^(\s*)~~~\s*$").unwrap());
231pub static INDENTED_CODE_BLOCK_PATTERN: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"^(\s{4,})").unwrap());
232
233pub static HTML_TAG_REGEX: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"<([a-zA-Z][^>]*)>").unwrap());
235pub static HTML_SELF_CLOSING_TAG_REGEX: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"<([a-zA-Z][^>]*/)>").unwrap());
236pub static HTML_TAG_FINDER: LazyLock<Regex> = LazyLock::new(|| Regex::new("(?i)</?[a-zA-Z][^>]*>").unwrap());
237pub static HTML_OPENING_TAG_FINDER: LazyLock<Regex> = LazyLock::new(|| Regex::new("(?i)<[a-zA-Z][^>]*>").unwrap());
238pub static HTML_TAG_QUICK_CHECK: LazyLock<Regex> = LazyLock::new(|| Regex::new("(?i)</?[a-zA-Z]").unwrap());
239
240pub static LINK_REFERENCE_DEFINITION_REGEX: LazyLock<Regex> =
242 LazyLock::new(|| Regex::new(r"^\s*\[([^\]]+)\]:\s+(.+)$").unwrap());
243pub static INLINE_LINK_REGEX: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"\[([^\]]+)\]\(([^)]+)\)").unwrap());
244pub static LINK_TEXT_REGEX: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"\[([^\]]*)\]").unwrap());
245pub static LINK_REGEX: LazyLock<FancyRegex> =
246 LazyLock::new(|| FancyRegex::new(r"(?<!\\)\[([^\]]*)\]\(([^)#]*)#([^)]+)\)").unwrap());
247pub static EXTERNAL_URL_REGEX: LazyLock<FancyRegex> =
248 LazyLock::new(|| FancyRegex::new(r"^(https?://|ftp://|www\.|[^/]+\.[a-z]{2,})").unwrap());
249
250pub static IMAGE_REGEX: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"!\[([^\]]*)\]\(([^)]+)\)").unwrap());
252
253pub static TRAILING_WHITESPACE_REGEX: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"\s+$").unwrap());
255pub static MULTIPLE_BLANK_LINES_REGEX: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"\n{3,}").unwrap());
256
257pub static FRONT_MATTER_REGEX: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"^---\n.*?\n---\n").unwrap());
259
260pub static INLINE_CODE_REGEX: LazyLock<FancyRegex> = LazyLock::new(|| FancyRegex::new(r"`[^`]+`").unwrap());
262pub static BOLD_ASTERISK_REGEX: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"\*\*(.+?)\*\*").unwrap());
263pub static BOLD_UNDERSCORE_REGEX: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"__(.+?)__").unwrap());
264pub static ITALIC_ASTERISK_REGEX: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"\*([^*]+?)\*").unwrap());
265pub static ITALIC_UNDERSCORE_REGEX: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"_([^_]+?)_").unwrap());
266pub static LINK_TEXT_FULL_REGEX: LazyLock<FancyRegex> =
267 LazyLock::new(|| FancyRegex::new(r"\[([^\]]*)\]\([^)]*\)").unwrap());
268pub static STRIKETHROUGH_REGEX: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"~~(.+?)~~").unwrap());
269pub static MULTIPLE_HYPHENS: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"-{2,}").unwrap());
270pub static TOC_SECTION_START: LazyLock<Regex> =
271 LazyLock::new(|| Regex::new(r"^#+\s*(?:Table of Contents|Contents|TOC)\s*$").unwrap());
272
273pub static BLOCKQUOTE_PREFIX_RE: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"^(\s*>+\s*)").unwrap());
275
276pub static IMAGE_REF_PATTERN: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"^!\[.*?\]\[.*?\]$").unwrap());
278pub static LINK_REF_PATTERN: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"^\[.*?\]:\s*https?://\S+$").unwrap());
279pub static URL_IN_TEXT: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"https?://\S+").unwrap());
280pub static SENTENCE_END: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"[.!?]\s+[A-Z]").unwrap());
281pub static ABBREVIATION: LazyLock<Regex> = LazyLock::new(|| {
282 Regex::new(r"\b(?:Mr|Mrs|Ms|Dr|Prof|Sr|Jr|vs|etc|i\.e|e\.g|Inc|Corp|Ltd|Co|St|Ave|Blvd|Rd|Ph\.D|M\.D|B\.A|M\.A|Ph\.D|U\.S|U\.K|U\.N|N\.Y|L\.A|D\.C)\.\s+[A-Z]").unwrap()
283});
284pub static DECIMAL_NUMBER: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"\d+\.\s*\d+").unwrap());
285pub static LIST_ITEM: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"^\s*\d+\.\s+").unwrap());
286pub static REFERENCE_LINK: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"\[([^\]]*)\]\[([^\]]*)\]").unwrap());
287
288pub static EMAIL_PATTERN: LazyLock<Regex> =
290 LazyLock::new(|| Regex::new(r"[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}").unwrap());
291
292pub static REF_LINK_REGEX: LazyLock<FancyRegex> =
296 LazyLock::new(|| FancyRegex::new(r"(?<!\\)\[((?:[^\[\]\\]|\\.|\[[^\]]*\])*)\]\[([^\]]*)\]").unwrap());
297
298pub static SHORTCUT_REF_REGEX: LazyLock<FancyRegex> =
303 LazyLock::new(|| FancyRegex::new(r"(?<![\\)\]])\[((?:[^\[\]\\]|\\.|\[[^\]]*\])*)\](?!\s*[\[\(])").unwrap());
304
305pub static INLINE_LINK_FANCY_REGEX: LazyLock<FancyRegex> =
307 LazyLock::new(|| FancyRegex::new(r"(?<!\\)\[([^\]]+)\]\(([^)]+)\)").unwrap());
308
309pub static INLINE_IMAGE_FANCY_REGEX: LazyLock<FancyRegex> =
311 LazyLock::new(|| FancyRegex::new(r"!\[([^\]]*)\]\(([^)]+)\)").unwrap());
312
313pub static REF_IMAGE_REGEX: LazyLock<FancyRegex> =
315 LazyLock::new(|| FancyRegex::new(r"!\[((?:[^\[\]\\]|\\.|\[[^\]]*\])*)\]\[([^\]]*)\]").unwrap());
316
317pub static FOOTNOTE_REF_REGEX: LazyLock<FancyRegex> = LazyLock::new(|| FancyRegex::new(r"\[\^([^\]]+)\]").unwrap());
319
320pub static STRIKETHROUGH_FANCY_REGEX: LazyLock<FancyRegex> = LazyLock::new(|| FancyRegex::new(r"~~([^~]+)~~").unwrap());
322
323pub static WIKI_LINK_REGEX: LazyLock<FancyRegex> = LazyLock::new(|| FancyRegex::new(r"\[\[([^\]]+)\]\]").unwrap());
325
326pub static INLINE_MATH_REGEX: LazyLock<FancyRegex> =
328 LazyLock::new(|| FancyRegex::new(r"(?<!\$)\$(?!\$)([^\$]+)\$(?!\$)").unwrap());
329pub static DISPLAY_MATH_REGEX: LazyLock<FancyRegex> = LazyLock::new(|| FancyRegex::new(r"\$\$([^\$]+)\$\$").unwrap());
330
331pub static EMOJI_SHORTCODE_REGEX: LazyLock<FancyRegex> =
333 LazyLock::new(|| FancyRegex::new(r":([a-zA-Z0-9_+-]+):").unwrap());
334
335pub static HTML_TAG_PATTERN: LazyLock<FancyRegex> =
337 LazyLock::new(|| FancyRegex::new(r"</?[a-zA-Z][^>]*>|<[a-zA-Z][^>]*/\s*>").unwrap());
338
339pub static HTML_ENTITY_REGEX: LazyLock<FancyRegex> =
341 LazyLock::new(|| FancyRegex::new(r"&[a-zA-Z][a-zA-Z0-9]*;|&#\d+;|&#x[0-9a-fA-F]+;").unwrap());
342
343pub static HTML_COMMENT_START: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"<!--").unwrap());
346pub static HTML_COMMENT_END: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"-->").unwrap());
347pub static HTML_COMMENT_PATTERN: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"<!--[\s\S]*?-->").unwrap());
348
349pub static HTML_HEADING_PATTERN: LazyLock<FancyRegex> =
351 LazyLock::new(|| FancyRegex::new(r"^\s*<h([1-6])(?:\s[^>]*)?>.*</h\1>\s*$").unwrap());
352
353pub static HEADING_CHECK: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"(?m)^(?:\s*)#").unwrap());
355
356pub static HR_DASH: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"^\-{3,}\s*$").unwrap());
358pub static HR_ASTERISK: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"^\*{3,}\s*$").unwrap());
359pub static HR_UNDERSCORE: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"^_{3,}\s*$").unwrap());
360pub static HR_SPACED_DASH: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"^(\-\s+){2,}\-\s*$").unwrap());
361pub static HR_SPACED_ASTERISK: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"^(\*\s+){2,}\*\s*$").unwrap());
362pub static HR_SPACED_UNDERSCORE: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"^(_\s+){2,}_\s*$").unwrap());
363
364pub fn has_heading_markers(content: &str) -> bool {
367 content.contains('#')
368}
369
370pub fn has_list_markers(content: &str) -> bool {
372 content.contains('*')
373 || content.contains('-')
374 || content.contains('+')
375 || (content.contains('.') && content.contains(|c: char| c.is_ascii_digit()))
376}
377
378pub fn has_code_block_markers(content: &str) -> bool {
380 content.contains("```") || content.contains("~~~") || content.contains("\n ")
381 }
383
384pub fn has_emphasis_markers(content: &str) -> bool {
386 content.contains('*') || content.contains('_')
387}
388
389pub fn has_html_tags(content: &str) -> bool {
391 content.contains('<') && (content.contains('>') || content.contains("/>"))
392}
393
394pub fn has_link_markers(content: &str) -> bool {
396 (content.contains('[') && content.contains(']'))
397 || content.contains("http://")
398 || content.contains("https://")
399 || content.contains("ftp://")
400}
401
402pub fn has_image_markers(content: &str) -> bool {
404 content.contains("![")
405}
406
407pub fn contains_url(content: &str) -> bool {
410 if !content.contains("://") {
412 return false;
413 }
414
415 let chars: Vec<char> = content.chars().collect();
416 let mut i = 0;
417
418 while i < chars.len() {
419 if i + 2 < chars.len()
421 && ((chars[i] == 'h' && chars[i + 1] == 't' && chars[i + 2] == 't')
422 || (chars[i] == 'f' && chars[i + 1] == 't' && chars[i + 2] == 'p'))
423 {
424 let mut j = i;
426 while j + 2 < chars.len() {
427 if chars[j] == ':' && chars[j + 1] == '/' && chars[j + 2] == '/' {
428 return true;
429 }
430 j += 1;
431
432 if j > i + 10 {
434 break;
435 }
436 }
437 }
438 i += 1;
439 }
440
441 false
442}
443
444pub fn escape_regex(s: &str) -> String {
446 let special_chars = ['.', '+', '*', '?', '^', '$', '(', ')', '[', ']', '{', '}', '|', '\\'];
447 let mut result = String::with_capacity(s.len() * 2);
448
449 for c in s.chars() {
450 if special_chars.contains(&c) {
451 result.push('\\');
452 }
453 result.push(c);
454 }
455
456 result
457}
458
459#[cfg(test)]
460mod tests {
461 use super::*;
462
463 #[test]
464 fn test_regex_cache_new() {
465 let cache = RegexCache::new();
466 assert!(cache.cache.is_empty());
467 assert!(cache.fancy_cache.is_empty());
468 assert!(cache.usage_stats.is_empty());
469 }
470
471 #[test]
472 fn test_regex_cache_default() {
473 let cache = RegexCache::default();
474 assert!(cache.cache.is_empty());
475 assert!(cache.fancy_cache.is_empty());
476 assert!(cache.usage_stats.is_empty());
477 }
478
479 #[test]
480 fn test_get_regex_compilation() {
481 let mut cache = RegexCache::new();
482
483 let regex1 = cache.get_regex(r"\d+").unwrap();
485 assert_eq!(cache.cache.len(), 1);
486 assert_eq!(cache.usage_stats.get(r"\d+"), Some(&1));
487
488 let regex2 = cache.get_regex(r"\d+").unwrap();
490 assert_eq!(cache.cache.len(), 1);
491 assert_eq!(cache.usage_stats.get(r"\d+"), Some(&2));
492
493 assert!(Arc::ptr_eq(®ex1, ®ex2));
495 }
496
497 #[test]
498 fn test_get_regex_invalid_pattern() {
499 let mut cache = RegexCache::new();
500 let result = cache.get_regex(r"[unterminated");
501 assert!(result.is_err());
502 assert!(cache.cache.is_empty());
503 }
504
505 #[test]
506 fn test_get_fancy_regex_compilation() {
507 let mut cache = RegexCache::new();
508
509 let regex1 = cache.get_fancy_regex(r"(?<=foo)bar").unwrap();
511 assert_eq!(cache.fancy_cache.len(), 1);
512 assert_eq!(cache.usage_stats.get(r"(?<=foo)bar"), Some(&1));
513
514 let regex2 = cache.get_fancy_regex(r"(?<=foo)bar").unwrap();
516 assert_eq!(cache.fancy_cache.len(), 1);
517 assert_eq!(cache.usage_stats.get(r"(?<=foo)bar"), Some(&2));
518
519 assert!(Arc::ptr_eq(®ex1, ®ex2));
521 }
522
523 #[test]
524 fn test_get_fancy_regex_invalid_pattern() {
525 let mut cache = RegexCache::new();
526 let result = cache.get_fancy_regex(r"(?<=invalid");
527 assert!(result.is_err());
528 assert!(cache.fancy_cache.is_empty());
529 }
530
531 #[test]
532 fn test_get_stats() {
533 let mut cache = RegexCache::new();
534
535 let _ = cache.get_regex(r"\d+").unwrap();
537 let _ = cache.get_regex(r"\d+").unwrap();
538 let _ = cache.get_regex(r"\w+").unwrap();
539 let _ = cache.get_fancy_regex(r"(?<=foo)bar").unwrap();
540
541 let stats = cache.get_stats();
542 assert_eq!(stats.get(r"\d+"), Some(&2));
543 assert_eq!(stats.get(r"\w+"), Some(&1));
544 assert_eq!(stats.get(r"(?<=foo)bar"), Some(&1));
545 }
546
547 #[test]
548 fn test_clear_cache() {
549 let mut cache = RegexCache::new();
550
551 let _ = cache.get_regex(r"\d+").unwrap();
553 let _ = cache.get_fancy_regex(r"(?<=foo)bar").unwrap();
554
555 assert!(!cache.cache.is_empty());
556 assert!(!cache.fancy_cache.is_empty());
557 assert!(!cache.usage_stats.is_empty());
558
559 cache.clear();
561
562 assert!(cache.cache.is_empty());
563 assert!(cache.fancy_cache.is_empty());
564 assert!(cache.usage_stats.is_empty());
565 }
566
567 #[test]
568 fn test_global_cache_functions() {
569 let regex1 = get_cached_regex(r"\d{3}").unwrap();
571 let regex2 = get_cached_regex(r"\d{3}").unwrap();
572 assert!(Arc::ptr_eq(®ex1, ®ex2));
573
574 let fancy1 = get_cached_fancy_regex(r"(?<=test)ing").unwrap();
576 let fancy2 = get_cached_fancy_regex(r"(?<=test)ing").unwrap();
577 assert!(Arc::ptr_eq(&fancy1, &fancy2));
578
579 let stats = get_cache_stats();
581 assert!(stats.contains_key(r"\d{3}"));
582 assert!(stats.contains_key(r"(?<=test)ing"));
583 }
584
585 #[test]
586 fn test_regex_lazy_macro() {
587 let re = regex_lazy!(r"^test.*end$");
588 assert!(re.is_match("test something end"));
589 assert!(!re.is_match("test something"));
590
591 let re2 = regex_lazy!(r"^start.*finish$");
595 assert!(re2.is_match("start and finish"));
596 assert!(!re2.is_match("start without end"));
597 }
598
599 #[test]
600 fn test_has_heading_markers() {
601 assert!(has_heading_markers("# Heading"));
602 assert!(has_heading_markers("Text with # symbol"));
603 assert!(!has_heading_markers("Text without heading marker"));
604 }
605
606 #[test]
607 fn test_has_list_markers() {
608 assert!(has_list_markers("* Item"));
609 assert!(has_list_markers("- Item"));
610 assert!(has_list_markers("+ Item"));
611 assert!(has_list_markers("1. Item"));
612 assert!(!has_list_markers("Text without list markers"));
613 }
614
615 #[test]
616 fn test_has_code_block_markers() {
617 assert!(has_code_block_markers("```code```"));
618 assert!(has_code_block_markers("~~~code~~~"));
619 assert!(has_code_block_markers("Text\n indented code"));
620 assert!(!has_code_block_markers("Text without code blocks"));
621 }
622
623 #[test]
624 fn test_has_emphasis_markers() {
625 assert!(has_emphasis_markers("*emphasis*"));
626 assert!(has_emphasis_markers("_emphasis_"));
627 assert!(has_emphasis_markers("**bold**"));
628 assert!(has_emphasis_markers("__bold__"));
629 assert!(!has_emphasis_markers("no emphasis"));
630 }
631
632 #[test]
633 fn test_has_html_tags() {
634 assert!(has_html_tags("<div>content</div>"));
635 assert!(has_html_tags("<br/>"));
636 assert!(has_html_tags("<img src='test.jpg'>"));
637 assert!(!has_html_tags("no html tags"));
638 assert!(!has_html_tags("less than < but no tag"));
639 }
640
641 #[test]
642 fn test_has_link_markers() {
643 assert!(has_link_markers("[text](url)"));
644 assert!(has_link_markers("[reference][1]"));
645 assert!(has_link_markers("http://example.com"));
646 assert!(has_link_markers("https://example.com"));
647 assert!(has_link_markers("ftp://example.com"));
648 assert!(!has_link_markers("no links here"));
649 }
650
651 #[test]
652 fn test_has_image_markers() {
653 assert!(has_image_markers(""));
654 assert!(has_image_markers(""));
655 assert!(!has_image_markers("[link](url)"));
656 assert!(!has_image_markers("no images"));
657 }
658
659 #[test]
660 fn test_contains_url() {
661 assert!(contains_url("http://example.com"));
662 assert!(contains_url("Text with https://example.com link"));
663 assert!(contains_url("ftp://example.com"));
664 assert!(!contains_url("Text without URL"));
665 assert!(!contains_url("http not followed by ://"));
666
667 assert!(!contains_url("http"));
669 assert!(!contains_url("https"));
670 assert!(!contains_url("://"));
671 assert!(contains_url("Visit http://site.com now"));
672 assert!(contains_url("See https://secure.site.com/path"));
673 }
674
675 #[test]
676 fn test_contains_url_performance() {
677 let long_text = "a".repeat(10000);
679 assert!(!contains_url(&long_text));
680
681 let text_with_url = format!("{long_text}https://example.com");
683 assert!(contains_url(&text_with_url));
684 }
685
686 #[test]
687 fn test_escape_regex() {
688 assert_eq!(escape_regex("a.b"), "a\\.b");
689 assert_eq!(escape_regex("a+b*c"), "a\\+b\\*c");
690 assert_eq!(escape_regex("(test)"), "\\(test\\)");
691 assert_eq!(escape_regex("[a-z]"), "\\[a-z\\]");
692 assert_eq!(escape_regex("normal text"), "normal text");
693
694 assert_eq!(escape_regex(".$^{[(|)*+?\\"), "\\.\\$\\^\\{\\[\\(\\|\\)\\*\\+\\?\\\\");
696
697 assert_eq!(escape_regex(""), "");
699
700 assert_eq!(escape_regex("test.com/path?query=1"), "test\\.com/path\\?query=1");
702 }
703
704 #[test]
705 fn test_static_regex_patterns() {
706 assert!(URL_REGEX.is_match("https://example.com"));
708 assert!(URL_REGEX.is_match("http://test.org/path"));
709 assert!(URL_REGEX.is_match("ftp://files.com"));
710 assert!(!URL_REGEX.is_match("not a url"));
711
712 assert!(ATX_HEADING_REGEX.is_match("# Heading"));
714 assert!(ATX_HEADING_REGEX.is_match(" ## Indented"));
715 assert!(ATX_HEADING_REGEX.is_match("### "));
716 assert!(!ATX_HEADING_REGEX.is_match("Not a heading"));
717
718 assert!(UNORDERED_LIST_MARKER_REGEX.is_match("* Item"));
720 assert!(UNORDERED_LIST_MARKER_REGEX.is_match("- Item"));
721 assert!(UNORDERED_LIST_MARKER_REGEX.is_match("+ Item"));
722 assert!(ORDERED_LIST_MARKER_REGEX.is_match("1. Item"));
723 assert!(ORDERED_LIST_MARKER_REGEX.is_match("99. Item"));
724
725 assert!(FENCED_CODE_BLOCK_START_REGEX.is_match("```"));
727 assert!(FENCED_CODE_BLOCK_START_REGEX.is_match("```rust"));
728 assert!(FENCED_CODE_BLOCK_START_REGEX.is_match("~~~"));
729 assert!(FENCED_CODE_BLOCK_END_REGEX.is_match("```"));
730 assert!(FENCED_CODE_BLOCK_END_REGEX.is_match("~~~"));
731
732 assert!(BOLD_ASTERISK_REGEX.is_match("**bold**"));
734 assert!(BOLD_UNDERSCORE_REGEX.is_match("__bold__"));
735 assert!(ITALIC_ASTERISK_REGEX.is_match("*italic*"));
736 assert!(ITALIC_UNDERSCORE_REGEX.is_match("_italic_"));
737
738 assert!(HTML_TAG_REGEX.is_match("<div>"));
740 assert!(HTML_TAG_REGEX.is_match("<span class='test'>"));
741 assert!(HTML_SELF_CLOSING_TAG_REGEX.is_match("<br/>"));
742 assert!(HTML_SELF_CLOSING_TAG_REGEX.is_match("<img src='test'/>"));
743
744 assert!(TRAILING_WHITESPACE_REGEX.is_match("line with spaces "));
746 assert!(TRAILING_WHITESPACE_REGEX.is_match("tabs\t\t"));
747 assert!(MULTIPLE_BLANK_LINES_REGEX.is_match("\n\n\n"));
748 assert!(MULTIPLE_BLANK_LINES_REGEX.is_match("\n\n\n\n"));
749
750 assert!(BLOCKQUOTE_PREFIX_RE.is_match("> Quote"));
752 assert!(BLOCKQUOTE_PREFIX_RE.is_match(" > Indented quote"));
753 assert!(BLOCKQUOTE_PREFIX_RE.is_match(">> Nested"));
754 }
755
756 #[test]
757 fn test_thread_safety() {
758 use std::thread;
759
760 let handles: Vec<_> = (0..10)
761 .map(|i| {
762 thread::spawn(move || {
763 let pattern = format!(r"\d{{{i}}}");
764 let regex = get_cached_regex(&pattern).unwrap();
765 assert!(regex.is_match(&"1".repeat(i)));
766 })
767 })
768 .collect();
769
770 for handle in handles {
771 handle.join().unwrap();
772 }
773 }
774}