use compact_str::CompactString;
use std::time::Duration;
#[derive(Debug, Default, Clone)]
pub struct Configuration {
pub respect_robots_txt: bool,
pub subdomains: bool,
pub tld: bool,
pub blacklist_url: Option<Box<Vec<CompactString>>>,
pub user_agent: Option<Box<CompactString>>,
pub delay: u64,
pub request_timeout: Option<Box<Duration>>,
pub http2_prior_knowledge: bool,
pub proxies: Option<Box<Vec<String>>>,
pub headers: Option<Box<reqwest::header::HeaderMap>>,
#[cfg(feature = "sitemap")]
pub sitemap_url: Option<Box<CompactString>>,
}
#[cfg(any(feature = "ua_generator"))]
pub fn get_ua() -> &'static str {
ua_generator::ua::spoof_ua()
}
#[cfg(not(any(feature = "ua_generator")))]
pub fn get_ua() -> &'static str {
use std::env;
lazy_static! {
static ref AGENT: &'static str =
concat!(env!("CARGO_PKG_NAME"), '/', env!("CARGO_PKG_VERSION"));
};
AGENT.as_ref()
}
impl Configuration {
pub fn new() -> Self {
Self {
delay: 0,
request_timeout: Some(Box::new(Duration::from_millis(15000))),
..Default::default()
}
}
#[cfg(feature = "regex")]
pub fn get_blacklist(&self) -> Box<regex::RegexSet> {
match &self.blacklist_url {
Some(blacklist) => match regex::RegexSet::new(&**blacklist) {
Ok(s) => Box::new(s),
_ => Default::default(),
},
_ => Default::default(),
}
}
#[cfg(not(feature = "regex"))]
pub fn get_blacklist(&self) -> Box<Vec<CompactString>> {
match &self.blacklist_url {
Some(blacklist) => blacklist.to_owned(),
_ => Default::default(),
}
}
pub fn with_respect_robots_txt(&mut self, respect_robots_txt: bool) -> &mut Self {
self.respect_robots_txt = respect_robots_txt;
self
}
pub fn with_subdomains(&mut self, subdomains: bool) -> &mut Self {
self.subdomains = subdomains;
self
}
pub fn with_tld(&mut self, tld: bool) -> &mut Self {
self.tld = tld;
self
}
pub fn with_delay(&mut self, delay: u64) -> &mut Self {
self.delay = delay;
self
}
pub fn with_http2_prior_knowledge(&mut self, http2_prior_knowledge: bool) -> &mut Self {
self.http2_prior_knowledge = http2_prior_knowledge;
self
}
pub fn with_request_timeout(&mut self, request_timeout: Option<Duration>) -> &mut Self {
match request_timeout {
Some(timeout) => {
self.request_timeout = Some(timeout.into());
}
_ => {
self.request_timeout = None;
}
};
self
}
#[cfg(feature = "sitemap")]
pub fn with_sitemap(&mut self, sitemap_url: Option<&str>) -> &mut Self {
match sitemap_url {
Some(sitemap_url) => {
self.sitemap_url = Some(CompactString::new(sitemap_url.to_string()).into())
}
_ => self.sitemap_url = None,
};
self
}
pub fn with_user_agent(&mut self, user_agent: Option<&str>) -> &mut Self {
match user_agent {
Some(agent) => self.user_agent = Some(CompactString::new(agent.to_string()).into()),
_ => self.user_agent = None,
};
self
}
pub fn with_proxies(&mut self, proxies: Option<Vec<String>>) -> &mut Self {
match proxies {
Some(p) => self.proxies = Some(p.into()),
_ => self.proxies = None,
};
self
}
pub fn with_blacklist_url<T>(&mut self, blacklist_url: Option<Vec<T>>) -> &mut Self
where
Vec<CompactString>: From<Vec<T>>,
{
match blacklist_url {
Some(p) => self.blacklist_url = Some(Box::new(p.into())),
_ => self.blacklist_url = None,
};
self
}
pub fn with_headers(&mut self, headers: Option<reqwest::header::HeaderMap>) -> &mut Self {
match headers {
Some(m) => self.headers = Some(m.into()),
_ => self.headers = None,
};
self
}
}