use std::collections::BTreeSet;
use async_trait::async_trait;
use regex::Match;
use crate::{
enums::content::Content,
interfaces::extractor::SubdomainExtractorInterface,
types::core::{Result, Subdomain},
utilities::regex::generate_subdomain_regex,
};
#[derive(Default)]
pub struct RegexExtractor {}
impl RegexExtractor {
pub fn extract_one(&self, content: String, domain: &str) -> Option<Subdomain> {
let pattern = generate_subdomain_regex(domain).unwrap();
let to_string = |matches: Match| matches.as_str().to_string();
pattern.find(&content).map(to_string)
}
}
#[async_trait]
impl SubdomainExtractorInterface for RegexExtractor {
async fn extract(&self, content: Content, domain: &str) -> Result<BTreeSet<Subdomain>> {
let pattern = generate_subdomain_regex(domain)?;
let to_string = |item: Match| item.as_str().parse().ok();
let content = content.as_string();
Ok(pattern.find_iter(&content).filter_map(to_string).collect())
}
}