pub struct Website {
pub configuration: Box<Configuration>,
pub on_link_find_callback: Option<fn(_: CaseInsensitiveString) -> CaseInsensitiveString>,
/* private fields */
}Expand description
Represents a website to crawl and gather all links.
use spider::website::Website;
let mut website = Website::new("http://example.com");
website.crawl();
// `Website` will be filled with `Pages` when crawled. To get them, just use
while let Some(page) = website.get_pages() {
// do something
}Fields§
§configuration: Box<Configuration>configuration properties for website.
on_link_find_callback: Option<fn(_: CaseInsensitiveString) -> CaseInsensitiveString>callback when a link is found.
Implementations§
source§impl Website
impl Website
sourcepub fn is_allowed(
&self,
link: &CaseInsensitiveString,
blacklist_url: &Box<Vec<CompactString>>
) -> bool
pub fn is_allowed( &self, link: &CaseInsensitiveString, blacklist_url: &Box<Vec<CompactString>> ) -> bool
return true if URL:
- is not already crawled
- is not blacklisted
- is not forbidden in robot.txt file (if parameter is defined)
sourcepub fn is_allowed_default(
&self,
link: &CompactString,
blacklist_url: &Box<Vec<CompactString>>
) -> bool
pub fn is_allowed_default( &self, link: &CompactString, blacklist_url: &Box<Vec<CompactString>> ) -> bool
return true if URL:
- is not blacklisted
- is not forbidden in robot.txt file (if parameter is defined)
sourcepub fn is_allowed_robots(&self, link: &str) -> bool
pub fn is_allowed_robots(&self, link: &str) -> bool
return true if URL:
- is not forbidden in robot.txt file (if parameter is defined)
sourcepub fn get_links(&self) -> &HashSet<CaseInsensitiveString>
pub fn get_links(&self) -> &HashSet<CaseInsensitiveString>
links visited getter
sourcepub fn get_absolute_path(&self, domain: Option<&str>) -> Option<Url>
pub fn get_absolute_path(&self, domain: Option<&str>) -> Option<Url>
absolute base url of crawl
sourcepub async fn configure_robots_parser(&mut self, client: Client) -> Client
pub async fn configure_robots_parser(&mut self, client: Client) -> Client
configure the robots parser on initial crawl attempt and run.
sourcepub fn configure_http_client(&mut self) -> Client
pub fn configure_http_client(&mut self) -> Client
configure http client
sourcepub async fn crawl_sync(&mut self)
pub async fn crawl_sync(&mut self)
Start to crawl website in sync
Trait Implementations§
Auto Trait Implementations§
impl RefUnwindSafe for Website
impl Send for Website
impl Sync for Website
impl Unpin for Website
impl UnwindSafe for Website
Blanket Implementations§
source§impl<T> BorrowMut<T> for Twhere
T: ?Sized,
impl<T> BorrowMut<T> for Twhere T: ?Sized,
source§fn borrow_mut(&mut self) -> &mut T
fn borrow_mut(&mut self) -> &mut T
Mutably borrows from an owned value. Read more