type UrlFilter = Box<dyn Fn(&str) -> bool + Send + Sync>;
pub struct SitemapSpider {
pub(super) sitemap_url: String,
pub(super) filter_url: Option<UrlFilter>,
}
impl SitemapSpider {
pub fn new(base_url: impl Into<String>) -> Self {
let base = base_url.into();
let sitemap = format!("{}/sitemap.xml", base.trim_end_matches('/'));
Self {
sitemap_url: sitemap,
filter_url: None,
}
}
pub fn with_sitemap(sitemap_url: impl Into<String>) -> Self {
Self {
sitemap_url: sitemap_url.into(),
filter_url: None,
}
}
pub fn from_robots(base_url: impl Into<String>) -> Self {
let base = base_url.into();
let robots_url = format!("{}/robots.txt", base.trim_end_matches('/'));
Self {
sitemap_url: robots_url,
filter_url: None,
}
}
pub fn filter_url(mut self, f: impl Fn(&str) -> bool + Send + Sync + 'static) -> Self {
self.filter_url = Some(Box::new(f));
self
}
}