use regex::Regex;
pub struct LinkExtractor {
pub(super) allow: Vec<Regex>,
pub(super) deny: Vec<Regex>,
pub(super) restrict_css: Option<String>,
pub(super) canonicalize: bool,
pub(super) allow_domains: Vec<String>,
pub(super) deny_domains: Vec<String>,
pub(super) tags: Vec<String>,
pub(super) attrs: Vec<String>,
}
impl LinkExtractor {
pub fn new() -> Self {
Self {
allow: vec![],
deny: vec![],
restrict_css: None,
canonicalize: false,
allow_domains: vec![],
deny_domains: vec![],
tags: vec!["a".into(), "area".into()],
attrs: vec!["href".into()],
}
}
pub fn allow(mut self, pattern: &str) -> Self {
self.allow.push(
Regex::new(pattern)
.unwrap_or_else(|e| panic!("invalid allow pattern '{pattern}': {e}")),
);
self
}
pub fn deny(mut self, pattern: &str) -> Self {
self.deny.push(
Regex::new(pattern).unwrap_or_else(|e| panic!("invalid deny pattern '{pattern}': {e}")),
);
self
}
pub fn restrict_css(mut self, selector: &str) -> Self {
self.restrict_css = Some(selector.to_string());
self
}
pub fn canonicalize(mut self, enabled: bool) -> Self {
self.canonicalize = enabled;
self
}
pub fn allow_domains(mut self, domains: &[&str]) -> Self {
self.allow_domains
.extend(domains.iter().map(|d| d.to_string()));
self
}
pub fn deny_domains(mut self, domains: &[&str]) -> Self {
self.deny_domains
.extend(domains.iter().map(|d| d.to_string()));
self
}
pub fn tags(mut self, tags: &[&str]) -> Self {
self.tags = tags.iter().map(|t| t.to_string()).collect();
self
}
pub fn attrs(mut self, attrs: &[&str]) -> Self {
self.attrs = attrs.iter().map(|a| a.to_string()).collect();
self
}
}
impl Default for LinkExtractor {
fn default() -> Self {
Self::new()
}
}