synapse_pingora/profiler/
patterns.rs1use once_cell::sync::Lazy;
11use regex::Regex;
12
13use crate::profiler::schema_types::PatternType;
14
15static UUID_PATTERN: Lazy<Regex> = Lazy::new(|| {
22 Regex::new(r"^[0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{12}$")
23 .expect("UUID regex compilation failed")
24});
25
26static EMAIL_PATTERN: Lazy<Regex> = Lazy::new(|| {
29 Regex::new(r"^[^\s@]+@[^\s@]+\.[^\s@]+$").expect("Email regex compilation failed")
30});
31
32static ISO_DATE_PATTERN: Lazy<Regex> = Lazy::new(|| {
35 Regex::new(r"^\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}").expect("ISO date regex compilation failed")
36});
37
38static URL_PATTERN: Lazy<Regex> =
41 Lazy::new(|| Regex::new(r"^https?://[^\s]+$").expect("URL regex compilation failed"));
42
43static IPV4_PATTERN: Lazy<Regex> = Lazy::new(|| {
46 Regex::new(r"^\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}$").expect("IPv4 regex compilation failed")
47});
48
49static IPV6_PATTERN: Lazy<Regex> = Lazy::new(|| {
52 Regex::new(r"^([0-9a-fA-F]{0,4}:){2,7}[0-9a-fA-F]{0,4}$")
53 .expect("IPv6 regex compilation failed")
54});
55
56static JWT_PATTERN: Lazy<Regex> = Lazy::new(|| {
59 Regex::new(r"^[A-Za-z0-9_-]+\.[A-Za-z0-9_-]+\.[A-Za-z0-9_-]+$")
60 .expect("JWT regex compilation failed")
61});
62
63static OBJECT_ID_PATTERN: Lazy<Regex> =
66 Lazy::new(|| Regex::new(r"^[0-9a-fA-F]{24}$").expect("ObjectId regex compilation failed"));
67
68static HEX_STRING_PATTERN: Lazy<Regex> =
71 Lazy::new(|| Regex::new(r"^[0-9a-fA-F]{16,}$").expect("Hex string regex compilation failed"));
72
73static PHONE_PATTERN: Lazy<Regex> = Lazy::new(|| {
76 Regex::new(
77 r"^[\+]?[(]?[0-9]{1,3}[)]?[-\s\.]?[(]?[0-9]{1,4}[)]?[-\s\.]?[0-9]{1,4}[-\s\.]?[0-9]{1,9}$",
78 )
79 .expect("Phone regex compilation failed")
80});
81
82static CREDIT_CARD_PATTERN: Lazy<Regex> = Lazy::new(|| {
85 Regex::new(r"^[0-9]{4}[-\s]?[0-9]{4}[-\s]?[0-9]{4}[-\s]?[0-9]{1,7}$")
86 .expect("Credit card regex compilation failed")
87});
88
89#[inline]
114pub fn detect_pattern(value: &str) -> Option<PatternType> {
115 if value.len() < 3 {
117 return None;
118 }
119
120 let len = value.len();
122
123 if len == 36 && UUID_PATTERN.is_match(value) {
125 return Some(PatternType::Uuid);
126 }
127
128 if len == 24 && OBJECT_ID_PATTERN.is_match(value) {
130 return Some(PatternType::ObjectId);
131 }
132
133 if len > 50 && value.contains('.') && JWT_PATTERN.is_match(value) {
135 return Some(PatternType::Jwt);
136 }
137
138 if value.contains('@') && EMAIL_PATTERN.is_match(value) {
140 return Some(PatternType::Email);
141 }
142
143 if value.starts_with(|c: char| c.is_ascii_digit()) {
145 if value.contains('T') && ISO_DATE_PATTERN.is_match(value) {
146 return Some(PatternType::IsoDate);
147 }
148
149 if value.contains('.') && !value.contains(':') && IPV4_PATTERN.is_match(value) {
151 return Some(PatternType::Ipv4);
152 }
153
154 if (13..=19).contains(&len) && CREDIT_CARD_PATTERN.is_match(value) {
156 return Some(PatternType::CreditCard);
157 }
158 }
159
160 if (7..=20).contains(&len) {
162 let first_char = value.chars().next();
163 if (matches!(first_char, Some('+') | Some('('))
164 || value.starts_with(|c: char| c.is_ascii_digit()))
165 && PHONE_PATTERN.is_match(value)
166 {
167 return Some(PatternType::Phone);
168 }
169 }
170
171 if value.starts_with("http") && URL_PATTERN.is_match(value) {
173 return Some(PatternType::Url);
174 }
175
176 if value.contains(':') && IPV6_PATTERN.is_match(value) {
178 return Some(PatternType::Ipv6);
179 }
180
181 if len >= 16 && HEX_STRING_PATTERN.is_match(value) {
183 return Some(PatternType::HexString);
184 }
185
186 None
187}
188
189#[inline]
191pub fn matches_pattern(value: &str, pattern: PatternType) -> bool {
192 match pattern {
193 PatternType::Uuid => UUID_PATTERN.is_match(value),
194 PatternType::Email => EMAIL_PATTERN.is_match(value),
195 PatternType::IsoDate => ISO_DATE_PATTERN.is_match(value),
196 PatternType::Url => URL_PATTERN.is_match(value),
197 PatternType::Ipv4 => IPV4_PATTERN.is_match(value),
198 PatternType::Ipv6 => IPV6_PATTERN.is_match(value),
199 PatternType::Jwt => JWT_PATTERN.is_match(value),
200 PatternType::ObjectId => OBJECT_ID_PATTERN.is_match(value),
201 PatternType::HexString => HEX_STRING_PATTERN.is_match(value),
202 PatternType::Phone => PHONE_PATTERN.is_match(value),
203 PatternType::CreditCard => CREDIT_CARD_PATTERN.is_match(value),
204 }
205}
206
207#[cfg(test)]
212mod tests {
213 use super::*;
214
215 #[test]
216 fn test_uuid_detection() {
217 assert_eq!(
218 detect_pattern("550e8400-e29b-41d4-a716-446655440000"),
219 Some(PatternType::Uuid)
220 );
221 assert_eq!(
222 detect_pattern("550E8400-E29B-41D4-A716-446655440000"),
223 Some(PatternType::Uuid)
224 );
225 assert_eq!(detect_pattern("not-a-uuid"), None);
226 assert_eq!(
228 detect_pattern("550e8400e29b41d4a716446655440000"),
229 Some(PatternType::HexString)
230 );
231 }
232
233 #[test]
234 fn test_email_detection() {
235 assert_eq!(detect_pattern("user@example.com"), Some(PatternType::Email));
236 assert_eq!(
237 detect_pattern("name.last@sub.domain.org"),
238 Some(PatternType::Email)
239 );
240 assert_eq!(detect_pattern("invalid-email"), None);
241 assert_eq!(detect_pattern("@nodomain"), None);
242 }
243
244 #[test]
245 fn test_iso_date_detection() {
246 assert_eq!(
247 detect_pattern("2024-01-15T10:30:00"),
248 Some(PatternType::IsoDate)
249 );
250 assert_eq!(
251 detect_pattern("2024-01-15T10:30:00Z"),
252 Some(PatternType::IsoDate)
253 );
254 assert_eq!(
255 detect_pattern("2024-01-15T10:30:00+05:00"),
256 Some(PatternType::IsoDate)
257 );
258 assert!(!matches_pattern("2024-01-15", PatternType::IsoDate));
260 }
261
262 #[test]
263 fn test_url_detection() {
264 assert_eq!(detect_pattern("http://example.com"), Some(PatternType::Url));
265 assert_eq!(
266 detect_pattern("https://api.example.com/path?query=1"),
267 Some(PatternType::Url)
268 );
269 assert_eq!(detect_pattern("ftp://example.com"), None); assert_eq!(detect_pattern("example.com"), None); }
272
273 #[test]
274 fn test_ipv4_detection() {
275 assert_eq!(detect_pattern("192.168.1.1"), Some(PatternType::Ipv4));
276 assert_eq!(detect_pattern("10.0.0.255"), Some(PatternType::Ipv4));
277 assert_eq!(detect_pattern("256.1.1.1"), Some(PatternType::Ipv4)); assert!(!matches_pattern("192.168.1", PatternType::Ipv4));
280 }
281
282 #[test]
283 fn test_ipv6_detection() {
284 assert_eq!(
285 detect_pattern("2001:0db8:85a3:0000:0000:8a2e:0370:7334"),
286 Some(PatternType::Ipv6)
287 );
288 assert_eq!(detect_pattern("::1"), Some(PatternType::Ipv6));
289 assert_eq!(detect_pattern("fe80::1"), Some(PatternType::Ipv6));
290 }
291
292 #[test]
293 fn test_jwt_detection() {
294 let jwt = "eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJzdWIiOiIxMjM0NTY3ODkwIn0.dozjgNryP4J3jVmNHl0w5N_XgL0n3I9PlFUP0THsR8U";
295 assert_eq!(detect_pattern(jwt), Some(PatternType::Jwt));
296 assert_eq!(detect_pattern("not.a.jwt"), None); }
298
299 #[test]
300 fn test_object_id_detection() {
301 assert_eq!(
302 detect_pattern("507f1f77bcf86cd799439011"),
303 Some(PatternType::ObjectId)
304 );
305 assert_eq!(
306 detect_pattern("507F1F77BCF86CD799439011"),
307 Some(PatternType::ObjectId)
308 );
309 assert_eq!(
311 detect_pattern("507f1f77bcf86cd79943901"),
312 Some(PatternType::HexString)
313 );
314 }
315
316 #[test]
317 fn test_hex_string_detection() {
318 assert_eq!(
319 detect_pattern("abcdef1234567890"),
320 Some(PatternType::HexString)
321 );
322 assert_eq!(
323 detect_pattern("0123456789abcdef0123456789abcdef"),
324 Some(PatternType::HexString)
325 );
326 assert_eq!(detect_pattern("abcdef12345678"), None); assert_eq!(detect_pattern("ghijkl1234567890"), None); }
329
330 #[test]
331 fn test_phone_detection() {
332 assert_eq!(detect_pattern("+1-555-1234567"), Some(PatternType::Phone));
334 assert!(matches_pattern("+1-555-123-4567", PatternType::Phone));
336 assert!(matches_pattern("(555) 123-4567", PatternType::Phone));
337 assert!(matches_pattern("555.123.4567", PatternType::Phone));
338 }
339
340 #[test]
341 fn test_matches_pattern() {
342 assert!(matches_pattern(
343 "550e8400-e29b-41d4-a716-446655440000",
344 PatternType::Uuid
345 ));
346 assert!(!matches_pattern("not-a-uuid", PatternType::Uuid));
347
348 assert!(matches_pattern("user@example.com", PatternType::Email));
349 assert!(!matches_pattern("invalid", PatternType::Email));
350 }
351
352 #[test]
353 fn test_empty_and_short_strings() {
354 assert_eq!(detect_pattern(""), None);
355 assert_eq!(detect_pattern("ab"), None);
356 assert_eq!(detect_pattern("abc"), None);
357 }
358
359 #[test]
360 fn test_pattern_priority() {
361 let uuid = "550e8400-e29b-41d4-a716-446655440000";
363 assert_eq!(detect_pattern(uuid), Some(PatternType::Uuid));
364
365 let object_id = "507f1f77bcf86cd799439011";
367 assert_eq!(detect_pattern(object_id), Some(PatternType::ObjectId));
368 }
369
370 #[test]
371 fn test_credit_card_detection() {
372 assert!(matches_pattern(
374 "4111-1111-1111-1111",
375 PatternType::CreditCard
376 ));
377 assert!(matches_pattern("4111111111111111", PatternType::CreditCard));
378 assert!(matches_pattern(
379 "4111 1111 1111 1111",
380 PatternType::CreditCard
381 ));
382
383 assert!(matches_pattern("1234567890123", PatternType::CreditCard)); assert!(matches_pattern(
386 "1234567890123456789",
387 PatternType::CreditCard
388 )); assert_eq!(
392 detect_pattern("4111-2222-3333-4444"),
393 Some(PatternType::CreditCard)
394 );
395 assert_eq!(
396 detect_pattern("4111222233334444"),
397 Some(PatternType::CreditCard)
398 );
399
400 assert_ne!(detect_pattern("12345"), Some(PatternType::CreditCard)); assert_ne!(
403 detect_pattern("1234-5678-9012-3456-7890"),
404 Some(PatternType::CreditCard)
405 ); }
407}