Struct spider::website::Website

source ·
pub struct Website {
    pub configuration: Box<Configuration>,
    pub on_link_find_callback: Option<fn(_: CaseInsensitiveString) -> CaseInsensitiveString>,
    /* private fields */
}
Expand description

Represents a website to crawl and gather all links.

use spider::website::Website;
let mut website = Website::new("http://example.com");
website.crawl();
// `Website` will be filled with `Pages` when crawled. To get them, just use
while let Some(page) = website.get_pages() {
    // do something
}

Fields§

§configuration: Box<Configuration>

configuration properties for website.

§on_link_find_callback: Option<fn(_: CaseInsensitiveString) -> CaseInsensitiveString>

callback when a link is found.

Implementations§

source§

impl Website

source

pub fn new(domain: &str) -> Self

Initialize Website object with a start link to crawl.

source

pub fn is_allowed( &self, link: &CaseInsensitiveString, blacklist_url: &Box<Vec<CompactString>> ) -> bool

return true if URL:

  • is not already crawled
  • is not blacklisted
  • is not forbidden in robot.txt file (if parameter is defined)
source

pub fn is_allowed_default( &self, link: &CompactString, blacklist_url: &Box<Vec<CompactString>> ) -> bool

return true if URL:

  • is not blacklisted
  • is not forbidden in robot.txt file (if parameter is defined)
source

pub fn is_allowed_robots(&self, link: &str) -> bool

return true if URL:

  • is not forbidden in robot.txt file (if parameter is defined)
source

pub fn get_pages(&self) -> Option<&Box<Vec<Page>>>

page getter

links visited getter

source

pub fn get_absolute_path(&self, domain: Option<&str>) -> Option<Url>

absolute base url of crawl

source

pub async fn configure_robots_parser(&mut self, client: Client) -> Client

configure the robots parser on initial crawl attempt and run.

source

pub fn configure_http_client(&mut self) -> Client

configure http client

source

pub async fn setup<T>(&mut self) -> (Client, Option<T>)

setup config for crawl

source

pub async fn crawl(&mut self)

Start to crawl website with async conccurency

source

pub async fn crawl_sync(&mut self)

Start to crawl website in sync

source

pub async fn scrape(&mut self)

Start to scrape/download website with async conccurency

Trait Implementations§

source§

impl Clone for Website

source§

fn clone(&self) -> Website

Returns a copy of the value. Read more
1.0.0 · source§

fn clone_from(&mut self, source: &Self)

Performs copy-assignment from source. Read more
source§

impl Debug for Website

source§

fn fmt(&self, f: &mut Formatter<'_>) -> Result

Formats the value using the given formatter. Read more

Auto Trait Implementations§

Blanket Implementations§

source§

impl<T> Any for Twhere T: 'static + ?Sized,

source§

fn type_id(&self) -> TypeId

Gets the TypeId of self. Read more
source§

impl<T> Borrow<T> for Twhere T: ?Sized,

source§

fn borrow(&self) -> &T

Immutably borrows from an owned value. Read more
source§

impl<T> BorrowMut<T> for Twhere T: ?Sized,

source§

fn borrow_mut(&mut self) -> &mut T

Mutably borrows from an owned value. Read more
source§

impl<T> From<T> for T

source§

fn from(t: T) -> T

Returns the argument unchanged.

source§

impl<T> Instrument for T

source§

fn instrument(self, span: Span) -> Instrumented<Self>

Instruments this type with the provided Span, returning an Instrumented wrapper. Read more
source§

fn in_current_span(self) -> Instrumented<Self>

Instruments this type with the current Span, returning an Instrumented wrapper. Read more
source§

impl<T, U> Into<U> for Twhere U: From<T>,

source§

fn into(self) -> U

Calls U::from(self).

That is, this conversion is whatever the implementation of From<T> for U chooses to do.

source§

impl<T> ToOwned for Twhere T: Clone,

§

type Owned = T

The resulting type after obtaining ownership.
source§

fn to_owned(&self) -> T

Creates owned data from borrowed data, usually by cloning. Read more
source§

fn clone_into(&self, target: &mut T)

Uses borrowed data to replace owned data, usually by cloning. Read more
source§

impl<T, U> TryFrom<U> for Twhere U: Into<T>,

§

type Error = Infallible

The type returned in the event of a conversion error.
source§

fn try_from(value: U) -> Result<T, <T as TryFrom<U>>::Error>

Performs the conversion.
source§

impl<T, U> TryInto<U> for Twhere U: TryFrom<T>,

§

type Error = <U as TryFrom<T>>::Error

The type returned in the event of a conversion error.
source§

fn try_into(self) -> Result<U, <U as TryFrom<T>>::Error>

Performs the conversion.
source§

impl<T> WithSubscriber for T

source§

fn with_subscriber<S>(self, subscriber: S) -> WithDispatch<Self>where S: Into<Dispatch>,

Attaches the provided Subscriber to this type, returning a WithDispatch wrapper. Read more
source§

fn with_current_subscriber(self) -> WithDispatch<Self>

Attaches the current default Subscriber to this type, returning a WithDispatch wrapper. Read more