1use cedarwood::Cedar;
82use regex::Regex;
83
84#[derive(Debug, thiserror::Error)]
87pub enum Error {
88 #[error(transparent)]
90 RegexError(#[from] regex::Error),
91}
92
93pub const REGEXP_DOMAIN_OR_PREFIX: &str = r"^([a-zA-Z0-9][a-zA-Z0-9-]*[a-zA-Z0-9]*\.)+([a-zA-Z]{2,}|\*)";
96
97fn reverse_string(text: &str) -> String {
99 text.chars().rev().collect::<String>()
100}
101
102#[derive(Debug, Clone)]
104pub struct DomainMatchingRule {
107 prefix_cedar: Cedar,
109 suffix_cedar: Cedar,
111 prefix_dict: Vec<String>,
113 suffix_dict: Vec<String>,
115}
116
117impl TryFrom<Vec<&str>> for DomainMatchingRule {
119 type Error = Error;
120
121 fn try_from(domain_list: Vec<&str>) -> Result<Self, Self::Error> {
123 DomainMatchingRule::try_from(domain_list.as_slice())
124 }
125}
126
127impl TryFrom<Vec<String>> for DomainMatchingRule {
128 type Error = Error;
129
130 fn try_from(domain_list: Vec<String>) -> Result<Self, Self::Error> {
132 let domain_list: Vec<&str> = domain_list.iter().map(AsRef::as_ref).collect();
133 DomainMatchingRule::try_from(domain_list)
134 }
135}
136
137impl TryFrom<&[String]> for DomainMatchingRule {
138 type Error = Error;
139
140 fn try_from(domain_list: &[String]) -> Result<Self, Self::Error> {
142 let domain_list: Vec<&str> = domain_list.iter().map(AsRef::as_ref).collect();
143 DomainMatchingRule::try_from(domain_list)
144 }
145}
146
147impl TryFrom<&[&str]> for DomainMatchingRule {
148 type Error = Error;
149
150 fn try_from(domain_list: &[&str]) -> Result<Self, Self::Error> {
152 let start_with_star = Regex::new(r"^\*\..+")?;
153 let end_with_star = Regex::new(r".+\.\*$")?;
154 let re = Regex::new(&format!("{}{}{}", r"^", REGEXP_DOMAIN_OR_PREFIX, r"$"))?;
156 let dict: Vec<String> = domain_list
157 .iter()
158 .map(|d| if start_with_star.is_match(d) { &d[2..] } else { d })
159 .filter(|x| re.is_match(x) || (x.split('.').count() == 1))
160 .map(|y| y.to_ascii_lowercase())
161 .collect();
162 let prefix_dict: Vec<String> = dict
163 .iter()
164 .filter(|d| end_with_star.is_match(d))
165 .map(|d| d[..d.len() - 2].to_string())
166 .collect();
167 let suffix_dict: Vec<String> = dict
168 .iter()
169 .filter(|d| !end_with_star.is_match(d))
170 .map(|d| reverse_string(d))
171 .collect();
172
173 let prefix_kv: Vec<(&str, i32)> = prefix_dict
174 .iter()
175 .map(AsRef::as_ref)
176 .enumerate()
177 .map(|(k, s)| (s, k as i32))
178 .collect();
179 let mut prefix_cedar = Cedar::new();
180 prefix_cedar.build(&prefix_kv);
181
182 let suffix_kv: Vec<(&str, i32)> = suffix_dict
183 .iter()
184 .map(AsRef::as_ref)
185 .enumerate()
186 .map(|(k, s)| (s, k as i32))
187 .collect();
188 let mut suffix_cedar = Cedar::new();
189 suffix_cedar.build(&suffix_kv);
190
191 Ok(DomainMatchingRule {
192 prefix_cedar,
193 suffix_cedar,
194 prefix_dict,
195 suffix_dict,
196 })
197 }
198}
199
200#[inline]
202fn find_match<'a>(name: &'a str, ceder: &'a Cedar, dict: &'a [String]) -> impl Iterator<Item = (String, usize)> + 'a {
204 ceder
205 .common_prefix_iter(name)
206 .map(|(x, matched_prefix_len)| (dict[x as usize].clone(), matched_prefix_len))
207}
208
209#[inline]
210fn find_match_all_inner(name: &str, cedar: &Cedar, dict: &[String]) -> Vec<String> {
212 let matched_items = find_match(name, &cedar, &dict);
213 matched_items
214 .filter_map(|(found, _)| {
215 if is_matched_as_domain(&found, name) {
216 Some(found)
217 } else {
218 None
219 }
220 })
221 .collect()
222}
223
224#[inline]
225fn find_match_inner(name: &str, cedar: &Cedar, dict: &[String]) -> bool {
227 let mut matched_items = find_match(name, &cedar, &dict);
228 matched_items.any(|(found, _)| is_matched_as_domain(&found, name))
229}
230
231#[inline]
232fn find_match_longest_inner(name: &str, cedar: &Cedar, dict: &[String]) -> Option<String> {
234 let matched_items = find_match(name, &cedar, &dict);
235 let longest_matched_as_domain = matched_items
236 .filter(|(found, _)| is_matched_as_domain(found, name))
237 .max_by_key(|(_, len)| *len);
238 longest_matched_as_domain.map(|(found, _)| found)
239}
240
241#[inline]
242fn is_matched_as_domain(matched: &str, domain_name: &str) -> bool {
244 if matched.len() == domain_name.len() {
245 true
246 } else if let Some(nth) = domain_name.chars().nth(matched.chars().count()) {
247 nth.to_string() == "."
248 } else {
249 false
250 }
251}
252
253impl DomainMatchingRule {
255 pub fn find_suffix_match(&self, domain_name: &str) -> bool {
259 let reverse_domain_name = reverse_string(domain_name);
260 find_match_inner(&reverse_domain_name, &self.suffix_cedar, &self.suffix_dict)
261 }
262
263 pub fn find_suffix_match_all(&self, domain_name: &str) -> Vec<String> {
266 let reverse_domain_name = reverse_string(domain_name);
267 find_match_all_inner(&reverse_domain_name, &self.suffix_cedar, &self.suffix_dict)
268 }
269
270 pub fn find_suffix_match_longest(&self, domain_name: &str) -> Option<String> {
273 let reverse_domain_name = reverse_string(domain_name);
274 find_match_longest_inner(&reverse_domain_name, &self.suffix_cedar, &self.suffix_dict)
275 }
276
277 pub fn find_prefix_match(&self, domain_name: &str) -> bool {
281 find_match_inner(domain_name, &self.prefix_cedar, &self.prefix_dict)
282 }
283
284 pub fn find_prefix_match_all(&self, domain_name: &str) -> Vec<String> {
287 find_match_all_inner(domain_name, &self.prefix_cedar, &self.prefix_dict)
288 }
289
290 pub fn find_prefix_match_longest(&self, domain_name: &str) -> Option<String> {
293 find_match_longest_inner(domain_name, &self.prefix_cedar, &self.prefix_dict)
294 }
295
296 pub fn is_matched(&self, domain_name: &str) -> bool {
301 if self.find_suffix_match(domain_name) {
302 return true;
303 }
304
305 if self.find_prefix_match(domain_name) {
306 return true;
307 }
308
309 false
312 }
313}
314
315#[cfg(test)]
318mod tests {
319 use super::*;
320
321 #[test]
322 fn matching_works() {
323 let domain_matching_rule = DomainMatchingRule::try_from(vec![
324 "www.google.com".to_string(),
325 "*.google.com".to_string(),
326 "yahoo.co.*".to_string(),
327 ])
328 .unwrap();
329
330 assert!(domain_matching_rule.is_matched("wwxx.google.com"));
331 assert!(domain_matching_rule.is_matched("yahoo.co.jp"));
332
333 assert!(!domain_matching_rule.is_matched("www.yahoo.com"));
334 assert!(!domain_matching_rule.is_matched("www.yahoo.co.jp"));
335 }
336
337 #[test]
338 fn matching_works_regardless_of_dns0x20() {
339 let domain_matching_rule = DomainMatchingRule::try_from(vec!["GOOGLE.com".to_string()]).unwrap();
340
341 assert!(domain_matching_rule.is_matched("www.google.com"));
342
343 assert!(domain_matching_rule.is_matched("WWW.gOoGlE.COM".to_ascii_lowercase().as_str()));
345 }
346
347 #[test]
348 fn find_suffix_match_all_works() {
349 let domain_matching_rule = DomainMatchingRule::try_from(vec![
350 "google.com".to_string(),
351 "*.google.com".to_string(),
352 "com".to_string(),
353 "example.com".to_string(),
354 ])
355 .unwrap();
356
357 let matches = domain_matching_rule.find_suffix_match_all("test.google.com");
359 assert!(matches.contains(&"moc.elgoog".to_string())); assert!(matches.contains(&"moc".to_string())); assert_eq!(matches.len(), 2);
362
363 let matches = domain_matching_rule.find_suffix_match_all("example.com");
365 assert!(matches.contains(&"moc.elpmaxe".to_string())); assert!(matches.contains(&"moc".to_string())); assert_eq!(matches.len(), 2);
368
369 let matches = domain_matching_rule.find_suffix_match_all("yahoo.org");
371 assert!(matches.is_empty());
372
373 let matches = domain_matching_rule.find_suffix_match_all("google.com");
375 assert!(matches.contains(&"moc.elgoog".to_string()));
376 assert!(matches.contains(&"moc".to_string()));
377 assert_eq!(matches.len(), 2);
378 }
379
380 #[test]
381 fn find_suffix_match_longest_works() {
382 let domain_matching_rule = DomainMatchingRule::try_from(vec![
383 "google.com".to_string(),
384 "*.google.com".to_string(),
385 "com".to_string(),
386 "example.com".to_string(),
387 ])
388 .unwrap();
389
390 let longest = domain_matching_rule.find_suffix_match_longest("test.google.com");
392 assert_eq!(longest, Some("moc.elgoog".to_string())); let longest = domain_matching_rule.find_suffix_match_longest("example.com");
396 assert_eq!(longest, Some("moc.elpmaxe".to_string())); let longest = domain_matching_rule.find_suffix_match_longest("yahoo.org");
400 assert_eq!(longest, None);
401
402 let domain_matching_rule_single = DomainMatchingRule::try_from(vec!["test.org".to_string()]).unwrap();
404 let longest = domain_matching_rule_single.find_suffix_match_longest("test.org");
405 assert_eq!(longest, Some("gro.tset".to_string()));
406 }
407
408 #[test]
409 fn find_suffix_match_works() {
410 let domain_matching_rule = DomainMatchingRule::try_from(vec![
411 "google.com".to_string(),
412 "*.google.com".to_string(),
413 "example.org".to_string(),
414 ])
415 .unwrap();
416
417 assert!(domain_matching_rule.find_suffix_match("www.google.com"));
419 assert!(domain_matching_rule.find_suffix_match("google.com"));
420 assert!(domain_matching_rule.find_suffix_match("example.org"));
421
422 assert!(!domain_matching_rule.find_suffix_match("yahoo.com"));
424 assert!(!domain_matching_rule.find_suffix_match("google.org"));
425 }
426
427 #[test]
428 fn find_prefix_match_works() {
429 let domain_matching_rule =
430 DomainMatchingRule::try_from(vec!["www.example.*".to_string(), "blog.test.*".to_string()]).unwrap();
431
432 assert!(domain_matching_rule.find_prefix_match("www.example.com"));
434 assert!(domain_matching_rule.find_prefix_match("www.example.org"));
435 assert!(domain_matching_rule.find_prefix_match("blog.test.net"));
436
437 assert!(!domain_matching_rule.find_prefix_match("api.example.com"));
439 assert!(!domain_matching_rule.find_prefix_match("www.google.com"));
440 assert!(!domain_matching_rule.find_prefix_match("example.com"));
441 }
442
443 #[test]
444 fn find_prefix_match_all_works() {
445 let domain_matching_rule = DomainMatchingRule::try_from(vec![
446 "www.example.*".to_string(),
447 "www.*".to_string(),
448 "blog.test.*".to_string(),
449 "example.*".to_string(),
450 ])
451 .unwrap();
452
453 let matches = domain_matching_rule.find_prefix_match_all("www.example.com");
455 assert!(matches.contains(&"www.example".to_string()));
456 assert!(matches.contains(&"www".to_string()));
457 assert_eq!(matches.len(), 2);
458
459 let matches = domain_matching_rule.find_prefix_match_all("blog.test.net");
461 assert!(matches.contains(&"blog.test".to_string()));
462 assert_eq!(matches.len(), 1);
463
464 let matches = domain_matching_rule.find_prefix_match_all("api.google.com");
466 assert!(matches.is_empty());
467
468 let matches = domain_matching_rule.find_prefix_match_all("example.org");
470 assert!(matches.contains(&"example".to_string()));
471 assert_eq!(matches.len(), 1);
472
473 let matches = domain_matching_rule.find_prefix_match_all("www.test.co.uk");
475 assert!(matches.contains(&"www".to_string()));
476 assert_eq!(matches.len(), 1);
477 }
478
479 #[test]
480 fn find_prefix_match_longest_works() {
481 let domain_matching_rule = DomainMatchingRule::try_from(vec![
482 "www.example.*".to_string(),
483 "www.*".to_string(),
484 "blog.test.*".to_string(),
485 "example.*".to_string(),
486 ])
487 .unwrap();
488
489 let longest = domain_matching_rule.find_prefix_match_longest("www.example.com");
491 assert_eq!(longest, Some("www.example".to_string())); let longest = domain_matching_rule.find_prefix_match_longest("blog.test.net");
495 assert_eq!(longest, Some("blog.test".to_string()));
496
497 let longest = domain_matching_rule.find_prefix_match_longest("api.google.com");
499 assert_eq!(longest, None);
500
501 let longest = domain_matching_rule.find_prefix_match_longest("example.org");
503 assert_eq!(longest, Some("example".to_string()));
504
505 let longest = domain_matching_rule.find_prefix_match_longest("www.test.co.uk");
507 assert_eq!(longest, Some("www".to_string()));
508 }
509
510 #[test]
511 fn edge_cases_work() {
512 let domain_matching_rule =
513 DomainMatchingRule::try_from(vec!["a.com".to_string(), "*.b.com".to_string(), "c.*".to_string()]).unwrap();
514
515 assert!(domain_matching_rule.find_suffix_match("a.com"));
517 assert!(domain_matching_rule.find_suffix_match("x.b.com"));
518 assert!(domain_matching_rule.find_prefix_match("c.org"));
519
520 let short_domain_rule = DomainMatchingRule::try_from(vec!["co".to_string()]).unwrap();
522 assert!(short_domain_rule.find_suffix_match("co"));
523
524 let empty_matches = domain_matching_rule.find_suffix_match_all("nonexistent.domain");
526 assert!(empty_matches.is_empty());
527
528 let no_longest = domain_matching_rule.find_suffix_match_longest("nonexistent.domain");
529 assert_eq!(no_longest, None);
530 }
531
532 #[test]
533 fn mixed_prefix_suffix_patterns_work() {
534 let domain_matching_rule = DomainMatchingRule::try_from(vec![
535 "*.google.com".to_string(), "www.example.*".to_string(), "exact.domain.net".to_string(), ])
539 .unwrap();
540
541 assert!(domain_matching_rule.is_matched("api.google.com"));
543 assert!(domain_matching_rule.is_matched("mail.google.com"));
544 assert!(domain_matching_rule.is_matched("exact.domain.net"));
545
546 assert!(domain_matching_rule.is_matched("www.example.com"));
548 assert!(domain_matching_rule.is_matched("www.example.org"));
549
550 assert!(domain_matching_rule.is_matched("google.com")); assert!(!domain_matching_rule.is_matched("example.com")); assert!(!domain_matching_rule.is_matched("api.example.com"));
555 }
556
557 #[test]
558 fn debug_pattern_behavior() {
559 let domain_matching_rule = DomainMatchingRule::try_from(vec!["*.google.com".to_string()]).unwrap();
560
561 assert!(domain_matching_rule.is_matched("api.google.com"));
563 assert!(domain_matching_rule.is_matched("google.com"));
564
565 let domain_matching_rule2 = DomainMatchingRule::try_from(vec!["www.example.*".to_string()]).unwrap();
566
567 assert!(domain_matching_rule2.is_matched("www.example.com"));
569 assert!(!domain_matching_rule2.is_matched("example.com"));
570 assert!(!domain_matching_rule2.is_matched("api.example.com"));
571 }
572
573 #[test]
574 fn test_try_from_implementations() {
575 let domains_vec_str = vec!["google.com", "*.example.com"];
576 let rule1 = DomainMatchingRule::try_from(domains_vec_str).unwrap();
577 assert!(rule1.is_matched("google.com"));
578 assert!(rule1.is_matched("test.example.com"));
579
580 let domains_vec_string = vec!["google.com".to_string(), "*.example.com".to_string()];
581 let rule2 = DomainMatchingRule::try_from(domains_vec_string).unwrap();
582 assert!(rule2.is_matched("google.com"));
583 assert!(rule2.is_matched("test.example.com"));
584
585 let domains_slice_string = vec!["google.com".to_string(), "*.example.com".to_string()];
586 let rule3 = DomainMatchingRule::try_from(domains_slice_string.as_slice()).unwrap();
587 assert!(rule3.is_matched("google.com"));
588 assert!(rule3.is_matched("test.example.com"));
589
590 let domains_slice_str: &[&str] = &["google.com", "*.example.com"];
591 let rule4 = DomainMatchingRule::try_from(domains_slice_str).unwrap();
592 assert!(rule4.is_matched("google.com"));
593 assert!(rule4.is_matched("test.example.com"));
594 }
595
596 #[test]
597 fn test_invalid_regex_patterns() {
598 let domain_matching_rule = DomainMatchingRule::try_from(vec![
600 "valid.com".to_string(),
601 "invalid..domain".to_string(), "*.valid.org".to_string(),
603 ])
604 .unwrap();
605
606 assert!(domain_matching_rule.is_matched("valid.com"));
607 assert!(domain_matching_rule.is_matched("test.valid.org"));
608 assert!(!domain_matching_rule.is_matched("invalid.domain"));
610 }
611
612 #[test]
613 fn test_empty_input() {
614 let empty_rule = DomainMatchingRule::try_from(vec![] as Vec<String>).unwrap();
615
616 assert!(!empty_rule.is_matched("google.com"));
618 assert!(!empty_rule.is_matched("example.org"));
619
620 assert!(empty_rule.find_suffix_match_all("google.com").is_empty());
622 assert_eq!(empty_rule.find_suffix_match_longest("google.com"), None);
623 assert!(!empty_rule.find_suffix_match("google.com"));
624 assert!(!empty_rule.find_prefix_match("google.com"));
625 }
626
627 #[test]
628 fn test_complex_multilevel_domains() {
629 let domain_matching_rule = DomainMatchingRule::try_from(vec![
630 "*.api.service.example.com".to_string(),
631 "deep.nested.domain.*".to_string(),
632 "a.b.c.d.e.f.g.com".to_string(),
633 ])
634 .unwrap();
635
636 assert!(domain_matching_rule.is_matched("v1.api.service.example.com"));
638 assert!(domain_matching_rule.is_matched("api.service.example.com"));
639
640 assert!(domain_matching_rule.is_matched("deep.nested.domain.org"));
642 assert!(domain_matching_rule.is_matched("deep.nested.domain.co.uk"));
643
644 assert!(domain_matching_rule.is_matched("a.b.c.d.e.f.g.com"));
646
647 assert!(!domain_matching_rule.is_matched("service.example.com"));
649 assert!(!domain_matching_rule.is_matched("nested.domain.org"));
650 }
651}