use once_cell::sync::Lazy;
use regex::Regex;
use crate::profiler::schema_types::PatternType;
static UUID_PATTERN: Lazy<Regex> = Lazy::new(|| {
Regex::new(r"^[0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{12}$")
.expect("UUID regex compilation failed")
});
static EMAIL_PATTERN: Lazy<Regex> = Lazy::new(|| {
Regex::new(r"^[^\s@]+@[^\s@]+\.[^\s@]+$").expect("Email regex compilation failed")
});
static ISO_DATE_PATTERN: Lazy<Regex> = Lazy::new(|| {
Regex::new(r"^\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}").expect("ISO date regex compilation failed")
});
static URL_PATTERN: Lazy<Regex> =
Lazy::new(|| Regex::new(r"^https?://[^\s]+$").expect("URL regex compilation failed"));
static IPV4_PATTERN: Lazy<Regex> = Lazy::new(|| {
Regex::new(r"^\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}$").expect("IPv4 regex compilation failed")
});
static IPV6_PATTERN: Lazy<Regex> = Lazy::new(|| {
Regex::new(r"^([0-9a-fA-F]{0,4}:){2,7}[0-9a-fA-F]{0,4}$")
.expect("IPv6 regex compilation failed")
});
static JWT_PATTERN: Lazy<Regex> = Lazy::new(|| {
Regex::new(r"^[A-Za-z0-9_-]+\.[A-Za-z0-9_-]+\.[A-Za-z0-9_-]+$")
.expect("JWT regex compilation failed")
});
static OBJECT_ID_PATTERN: Lazy<Regex> =
Lazy::new(|| Regex::new(r"^[0-9a-fA-F]{24}$").expect("ObjectId regex compilation failed"));
static HEX_STRING_PATTERN: Lazy<Regex> =
Lazy::new(|| Regex::new(r"^[0-9a-fA-F]{16,}$").expect("Hex string regex compilation failed"));
static PHONE_PATTERN: Lazy<Regex> = Lazy::new(|| {
Regex::new(
r"^[\+]?[(]?[0-9]{1,3}[)]?[-\s\.]?[(]?[0-9]{1,4}[)]?[-\s\.]?[0-9]{1,4}[-\s\.]?[0-9]{1,9}$",
)
.expect("Phone regex compilation failed")
});
static CREDIT_CARD_PATTERN: Lazy<Regex> = Lazy::new(|| {
Regex::new(r"^[0-9]{4}[-\s]?[0-9]{4}[-\s]?[0-9]{4}[-\s]?[0-9]{1,7}$")
.expect("Credit card regex compilation failed")
});
#[inline]
pub fn detect_pattern(value: &str) -> Option<PatternType> {
if value.len() < 3 {
return None;
}
let len = value.len();
if len == 36 && UUID_PATTERN.is_match(value) {
return Some(PatternType::Uuid);
}
if len == 24 && OBJECT_ID_PATTERN.is_match(value) {
return Some(PatternType::ObjectId);
}
if len > 50 && value.contains('.') && JWT_PATTERN.is_match(value) {
return Some(PatternType::Jwt);
}
if value.contains('@') && EMAIL_PATTERN.is_match(value) {
return Some(PatternType::Email);
}
if value.starts_with(|c: char| c.is_ascii_digit()) {
if value.contains('T') && ISO_DATE_PATTERN.is_match(value) {
return Some(PatternType::IsoDate);
}
if value.contains('.') && !value.contains(':') && IPV4_PATTERN.is_match(value) {
return Some(PatternType::Ipv4);
}
if (13..=19).contains(&len) && CREDIT_CARD_PATTERN.is_match(value) {
return Some(PatternType::CreditCard);
}
}
if (7..=20).contains(&len) {
let first_char = value.chars().next();
if (matches!(first_char, Some('+') | Some('('))
|| value.starts_with(|c: char| c.is_ascii_digit()))
&& PHONE_PATTERN.is_match(value)
{
return Some(PatternType::Phone);
}
}
if value.starts_with("http") && URL_PATTERN.is_match(value) {
return Some(PatternType::Url);
}
if value.contains(':') && IPV6_PATTERN.is_match(value) {
return Some(PatternType::Ipv6);
}
if len >= 16 && HEX_STRING_PATTERN.is_match(value) {
return Some(PatternType::HexString);
}
None
}
#[inline]
pub fn matches_pattern(value: &str, pattern: PatternType) -> bool {
match pattern {
PatternType::Uuid => UUID_PATTERN.is_match(value),
PatternType::Email => EMAIL_PATTERN.is_match(value),
PatternType::IsoDate => ISO_DATE_PATTERN.is_match(value),
PatternType::Url => URL_PATTERN.is_match(value),
PatternType::Ipv4 => IPV4_PATTERN.is_match(value),
PatternType::Ipv6 => IPV6_PATTERN.is_match(value),
PatternType::Jwt => JWT_PATTERN.is_match(value),
PatternType::ObjectId => OBJECT_ID_PATTERN.is_match(value),
PatternType::HexString => HEX_STRING_PATTERN.is_match(value),
PatternType::Phone => PHONE_PATTERN.is_match(value),
PatternType::CreditCard => CREDIT_CARD_PATTERN.is_match(value),
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_uuid_detection() {
assert_eq!(
detect_pattern("550e8400-e29b-41d4-a716-446655440000"),
Some(PatternType::Uuid)
);
assert_eq!(
detect_pattern("550E8400-E29B-41D4-A716-446655440000"),
Some(PatternType::Uuid)
);
assert_eq!(detect_pattern("not-a-uuid"), None);
assert_eq!(
detect_pattern("550e8400e29b41d4a716446655440000"),
Some(PatternType::HexString)
);
}
#[test]
fn test_email_detection() {
assert_eq!(detect_pattern("user@example.com"), Some(PatternType::Email));
assert_eq!(
detect_pattern("name.last@sub.domain.org"),
Some(PatternType::Email)
);
assert_eq!(detect_pattern("invalid-email"), None);
assert_eq!(detect_pattern("@nodomain"), None);
}
#[test]
fn test_iso_date_detection() {
assert_eq!(
detect_pattern("2024-01-15T10:30:00"),
Some(PatternType::IsoDate)
);
assert_eq!(
detect_pattern("2024-01-15T10:30:00Z"),
Some(PatternType::IsoDate)
);
assert_eq!(
detect_pattern("2024-01-15T10:30:00+05:00"),
Some(PatternType::IsoDate)
);
assert!(!matches_pattern("2024-01-15", PatternType::IsoDate));
}
#[test]
fn test_url_detection() {
assert_eq!(detect_pattern("http://example.com"), Some(PatternType::Url));
assert_eq!(
detect_pattern("https://api.example.com/path?query=1"),
Some(PatternType::Url)
);
assert_eq!(detect_pattern("ftp://example.com"), None); assert_eq!(detect_pattern("example.com"), None); }
#[test]
fn test_ipv4_detection() {
assert_eq!(detect_pattern("192.168.1.1"), Some(PatternType::Ipv4));
assert_eq!(detect_pattern("10.0.0.255"), Some(PatternType::Ipv4));
assert_eq!(detect_pattern("256.1.1.1"), Some(PatternType::Ipv4)); assert!(!matches_pattern("192.168.1", PatternType::Ipv4));
}
#[test]
fn test_ipv6_detection() {
assert_eq!(
detect_pattern("2001:0db8:85a3:0000:0000:8a2e:0370:7334"),
Some(PatternType::Ipv6)
);
assert_eq!(detect_pattern("::1"), Some(PatternType::Ipv6));
assert_eq!(detect_pattern("fe80::1"), Some(PatternType::Ipv6));
}
#[test]
fn test_jwt_detection() {
let jwt = "eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJzdWIiOiIxMjM0NTY3ODkwIn0.dozjgNryP4J3jVmNHl0w5N_XgL0n3I9PlFUP0THsR8U";
assert_eq!(detect_pattern(jwt), Some(PatternType::Jwt));
assert_eq!(detect_pattern("not.a.jwt"), None); }
#[test]
fn test_object_id_detection() {
assert_eq!(
detect_pattern("507f1f77bcf86cd799439011"),
Some(PatternType::ObjectId)
);
assert_eq!(
detect_pattern("507F1F77BCF86CD799439011"),
Some(PatternType::ObjectId)
);
assert_eq!(
detect_pattern("507f1f77bcf86cd79943901"),
Some(PatternType::HexString)
);
}
#[test]
fn test_hex_string_detection() {
assert_eq!(
detect_pattern("abcdef1234567890"),
Some(PatternType::HexString)
);
assert_eq!(
detect_pattern("0123456789abcdef0123456789abcdef"),
Some(PatternType::HexString)
);
assert_eq!(detect_pattern("abcdef12345678"), None); assert_eq!(detect_pattern("ghijkl1234567890"), None); }
#[test]
fn test_phone_detection() {
assert_eq!(detect_pattern("+1-555-1234567"), Some(PatternType::Phone));
assert!(matches_pattern("+1-555-123-4567", PatternType::Phone));
assert!(matches_pattern("(555) 123-4567", PatternType::Phone));
assert!(matches_pattern("555.123.4567", PatternType::Phone));
}
#[test]
fn test_matches_pattern() {
assert!(matches_pattern(
"550e8400-e29b-41d4-a716-446655440000",
PatternType::Uuid
));
assert!(!matches_pattern("not-a-uuid", PatternType::Uuid));
assert!(matches_pattern("user@example.com", PatternType::Email));
assert!(!matches_pattern("invalid", PatternType::Email));
}
#[test]
fn test_empty_and_short_strings() {
assert_eq!(detect_pattern(""), None);
assert_eq!(detect_pattern("ab"), None);
assert_eq!(detect_pattern("abc"), None);
}
#[test]
fn test_pattern_priority() {
let uuid = "550e8400-e29b-41d4-a716-446655440000";
assert_eq!(detect_pattern(uuid), Some(PatternType::Uuid));
let object_id = "507f1f77bcf86cd799439011";
assert_eq!(detect_pattern(object_id), Some(PatternType::ObjectId));
}
#[test]
fn test_credit_card_detection() {
assert!(matches_pattern(
"4111-1111-1111-1111",
PatternType::CreditCard
));
assert!(matches_pattern("4111111111111111", PatternType::CreditCard));
assert!(matches_pattern(
"4111 1111 1111 1111",
PatternType::CreditCard
));
assert!(matches_pattern("1234567890123", PatternType::CreditCard)); assert!(matches_pattern(
"1234567890123456789",
PatternType::CreditCard
));
assert_eq!(
detect_pattern("4111-2222-3333-4444"),
Some(PatternType::CreditCard)
);
assert_eq!(
detect_pattern("4111222233334444"),
Some(PatternType::CreditCard)
);
assert_ne!(detect_pattern("12345"), Some(PatternType::CreditCard)); assert_ne!(
detect_pattern("1234-5678-9012-3456-7890"),
Some(PatternType::CreditCard)
); }
}