use aho_corasick::AhoCorasick;
use rustc_hash::FxHashSet;
#[derive(Debug)]
pub struct DomainMatcher {
set: FxHashSet<String>,
}
impl DomainMatcher {
pub fn new() -> Self {
Self {
set: FxHashSet::default(),
}
}
pub fn add_exact(&mut self, domain: &str) {
self.set.insert(domain.to_ascii_lowercase());
}
pub fn add_suffix(&mut self, suffix: &str) {
let stripped = suffix.strip_prefix('.').unwrap_or(suffix);
let lower = stripped.to_ascii_lowercase();
self.set.insert(format!(".{lower}"));
self.set.insert(lower);
}
pub fn matches(&self, domain: &str) -> bool {
let lower = domain.to_ascii_lowercase();
if self.set.contains(lower.as_str()) {
return true;
}
let mut pos = 0;
while let Some(dot) = lower[pos..].find('.') {
let suffix = &lower[pos + dot..]; if self.set.contains(suffix) {
return true;
}
pos += dot + 1;
}
false
}
pub fn is_empty(&self) -> bool {
self.set.is_empty()
}
pub fn len(&self) -> usize {
self.set.len()
}
}
impl Default for DomainMatcher {
fn default() -> Self {
Self::new()
}
}
pub struct KeywordMatcher {
ac: AhoCorasick,
keywords: Vec<String>,
}
impl KeywordMatcher {
pub fn new(keywords: Vec<String>) -> Option<Self> {
if keywords.is_empty() {
return None;
}
let lower: Vec<String> = keywords.iter().map(|k| k.to_ascii_lowercase()).collect();
let ac = AhoCorasick::new(&lower).expect("valid patterns");
Some(Self {
ac,
keywords: lower,
})
}
pub fn matches(&self, domain: &str) -> bool {
let lower = domain.to_ascii_lowercase();
self.ac.is_match(&lower)
}
pub fn len(&self) -> usize {
self.keywords.len()
}
pub fn is_empty(&self) -> bool {
self.keywords.is_empty()
}
}
impl std::fmt::Debug for KeywordMatcher {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
f.debug_struct("KeywordMatcher")
.field("keywords", &self.keywords)
.finish()
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn domain_exact_match() {
let mut m = DomainMatcher::new();
m.add_exact("api.example.com");
assert!(m.matches("api.example.com"));
assert!(m.matches("API.EXAMPLE.COM")); assert!(!m.matches("example.com"));
assert!(!m.matches("other.api.example.com"));
}
#[test]
fn domain_suffix_match() {
let mut m = DomainMatcher::new();
m.add_suffix("apple.com");
assert!(m.matches("apple.com")); assert!(m.matches("store.apple.com")); assert!(m.matches("cdn.store.apple.com")); assert!(!m.matches("notapple.com")); assert!(!m.matches("com")); }
#[test]
fn domain_mixed() {
let mut m = DomainMatcher::new();
m.add_exact("specific.example.com");
m.add_suffix("google.com");
assert!(m.matches("specific.example.com"));
assert!(!m.matches("other.example.com"));
assert!(m.matches("google.com"));
assert!(m.matches("mail.google.com"));
}
#[test]
fn domain_empty() {
let m = DomainMatcher::new();
assert!(!m.matches("anything.com"));
assert!(m.is_empty());
}
#[test]
fn keyword_match() {
let m = KeywordMatcher::new(vec!["google".into(), "facebook".into()]).unwrap();
assert!(m.matches("www.google.com"));
assert!(m.matches("api.facebook.com"));
assert!(m.matches("GOOGLE.co.jp")); assert!(!m.matches("www.apple.com"));
}
#[test]
fn keyword_empty() {
assert!(KeywordMatcher::new(vec![]).is_none());
}
#[test]
fn keyword_single() {
let m = KeywordMatcher::new(vec!["ads".into()]).unwrap();
assert!(m.matches("ads.example.com"));
assert!(m.matches("example-ads.com"));
assert!(!m.matches("example.com"));
}
#[test]
fn domain_suffix_leading_dot_normalized() {
let mut m = DomainMatcher::new();
m.add_suffix(".example.com"); assert!(m.matches("example.com")); assert!(m.matches("sub.example.com")); assert!(!m.matches("notexample.com")); }
#[test]
fn domain_suffix_with_and_without_dot_equivalent() {
let mut m1 = DomainMatcher::new();
m1.add_suffix("apple.com");
let mut m2 = DomainMatcher::new();
m2.add_suffix(".apple.com");
for domain in &["apple.com", "store.apple.com", "cdn.store.apple.com"] {
assert_eq!(
m1.matches(domain),
m2.matches(domain),
"mismatch for {domain}"
);
}
for domain in &["notapple.com", "com"] {
assert_eq!(
m1.matches(domain),
m2.matches(domain),
"mismatch for {domain}"
);
}
}
}