use lazy_static::lazy_static;
use regex::Regex;
lazy_static! {
static ref DEFAULT_SKIP_PATTERNS: Vec<Regex> = vec![
Regex::new(r"https?://[^\s]+").expect("Valid URL regex"),
Regex::new(r"#[0-9a-fA-F]{3,8}").expect("Valid hex color regex"),
Regex::new(r"[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}").expect("Valid email regex"),
Regex::new(r"/[^\s]*").expect("Valid Unix path regex"),
Regex::new(r"[A-Za-z]:\\[^\s]*").expect("Valid Windows path regex"),
Regex::new(r"[0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{12}")
.expect("Valid UUID regex"),
Regex::new(r"[A-Za-z0-9+/]{20,}={1,2}").expect("Valid Base64 regex"),
Regex::new(r"\b[0-9a-fA-F]{7,40}\b").expect("Valid git hash regex"),
Regex::new(r"\[([^\]]+)\]\([^\s)]+\)").expect("Valid markdown link regex"),
];
}
pub fn get_default_skip_patterns() -> &'static Vec<Regex> {
&DEFAULT_SKIP_PATTERNS
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_url_pattern() {
let patterns = get_default_skip_patterns();
let url_pattern = &patterns[0];
assert!(url_pattern.is_match("https://www.example.com"));
assert!(url_pattern.is_match("http://github.com/user/repo"));
assert!(!url_pattern.is_match("not a url"));
}
#[test]
fn test_hex_color_pattern() {
let patterns = get_default_skip_patterns();
let hex_pattern = &patterns[1];
assert!(hex_pattern.is_match("#deadbeef"));
assert!(hex_pattern.is_match("#fff"));
assert!(hex_pattern.is_match("#123456"));
assert!(!hex_pattern.is_match("deadbeef")); assert!(!hex_pattern.is_match("#gg")); }
#[test]
fn test_base64_pattern() {
let patterns = get_default_skip_patterns();
let base64_pattern = &patterns[6];
assert!(base64_pattern.is_match("dGVzdCBiYXNlNjQgZW5jb2Rpbmc=")); assert!(base64_pattern.is_match("SGVsbG8gV29ybGQhIFRoaXMgaXM=")); assert!(!base64_pattern.is_match("dGVzdCBiYXNlNjQgZW5jb2Rpbmc"));
assert!(!base64_pattern.is_match("administraton/dashboard"));
assert!(!base64_pattern.is_match("some/long/path/to/a/file"));
}
#[test]
fn test_email_pattern() {
let patterns = get_default_skip_patterns();
let email_pattern = &patterns[2];
assert!(email_pattern.is_match("user@example.com"));
assert!(email_pattern.is_match("test.email+tag@domain.co.uk"));
assert!(!email_pattern.is_match("not an email"));
}
}