Struct voyager::Crawler

source · [−]

pub struct Crawler<T: Scraper> { /* fields omitted */ }

Expand description

The crawler that is responsible for driving the requests to completion and providing the crawl response for the Scraper.

Implementations

source

impl<T: Scraper> Crawler<T>

source

pub fn new(config: CrawlerConfig) -> Self

Create a new crawler following the config

source

pub fn max_depth(&self) -> usize

The maximum allowed depth of requests

source

pub fn respects_robots_txt(&self) -> bool

Whether this crawler respects the domains robots.txt rules

source

pub fn skips_non_successful_responses(&self) -> bool

Whether non 2xx responses are treated as failures and are not being scraped

source

impl<T> Crawler<T> where
 T: Scraper + Unpin + 'static,
 <T as Scraper>::State: Unpin + Send + Sync + 'static,
 <T as Scraper>::Output: Unpin,

source

pub fn crawl<TCrawlFunction, TCrawlFuture>(&mut self, fun: TCrawlFunction) where
TCrawlFunction: FnOnce(&Client) -> TCrawlFuture,
TCrawlFuture: Future<Output = Result<(Response, Option<T::State>)>> + 'static,

Send a crawling request whose html response and context is returned to the scraper again

source

pub fn complete<TCrawlFunction, TCrawlFuture>(&mut self, fun: TCrawlFunction) where
TCrawlFunction: FnOnce(&Client) -> TCrawlFuture,
TCrawlFuture: Future<Output = Result<Option<T::Output>>> + 'static,

Submit a complete crawling job that is driven to completion and directly returned once finished.

source

pub fn visit(&mut self, url: impl IntoUrl)

This queues in a GET request for the url, without any state attached

source

pub fn visit_with_state(&mut self, url: impl IntoUrl, state: T::State)

This queues in a GET request for the url with state attached

source

pub fn request(&mut self, req: RequestBuilder)

This queues in a whole request with no state attached

source

pub fn request_with_state(&mut self, req: RequestBuilder, state: T::State)

This queues in a whole request with a state attached

source

pub fn client(&self) -> &Client

The client that performs all request

Auto Trait Implementations

impl<T> !RefUnwindSafe for Crawler<T>

impl<T> !Send for Crawler<T>

impl<T> !Sync for Crawler<T>

impl<T> Unpin for Crawler<T> where
<T as Scraper>::Output: Unpin,
<T as Scraper>::State: Unpin,

impl<T> !UnwindSafe for Crawler<T>

Blanket Implementations

source

impl<T> Any for T where
T: 'static + ?Sized,

source

pub fn type_id(&self) -> TypeId

Gets the TypeId of self. Read more

source

impl<T> Borrow<T> for T where
T: ?Sized,

source

pub fn borrow(&self) -> &T

Immutably borrows from an owned value. Read more

source

impl<T> BorrowMut<T> for T where
T: ?Sized,

source

pub fn borrow_mut(&mut self) -> &mut T

Mutably borrows from an owned value. Read more

source

impl<T> From<T> for T

source

pub fn from(t: T) -> T

Performs the conversion.

source

impl<T> Instrument for T

source

fn instrument(self, span: Span) -> Instrumented<Self>

Instruments this type with the provided Span, returning an Instrumented wrapper. Read more

source

fn in_current_span(self) -> Instrumented<Self>

Instruments this type with the current Span, returning an Instrumented wrapper. Read more

source

impl<T, U> Into for T where
U: From<T>,

source

pub fn into(self) -> U

Performs the conversion.

source

impl<T, U> TryFrom for T where
U: Into<T>,

type Error = Infallible

The type returned in the event of a conversion error.

source

pub fn try_from(value: U) -> Result<T, <T as TryFrom>::Error>

Performs the conversion.

source

impl<T, U> TryInto for T where
U: TryFrom<T>,

type Error = >::Error

The type returned in the event of a conversion error.

source

pub fn try_into(self) -> Result<U, >::Error>

Performs the conversion.

Struct voyager::Crawler

Implementations

impl<T: Scraper> Crawler<T>

pub fn new(config: CrawlerConfig) -> Self

pub fn max_depth(&self) -> usize

pub fn respects_robots_txt(&self) -> bool

pub fn skips_non_successful_responses(&self) -> bool

impl<T> Crawler<T> where T: Scraper + Unpin + 'static, <T as Scraper>::State: Unpin + Send + Sync + 'static, <T as Scraper>::Output: Unpin,

pub fn crawl<TCrawlFunction, TCrawlFuture>(&mut self, fun: TCrawlFunction) where TCrawlFunction: FnOnce(&Client) -> TCrawlFuture, TCrawlFuture: Future<Output = Result<(Response, Option<T::State>)>> + 'static,

pub fn complete<TCrawlFunction, TCrawlFuture>(&mut self, fun: TCrawlFunction) where TCrawlFunction: FnOnce(&Client) -> TCrawlFuture, TCrawlFuture: Future<Output = Result<Option<T::Output>>> + 'static,

pub fn visit(&mut self, url: impl IntoUrl)

pub fn visit_with_state(&mut self, url: impl IntoUrl, state: T::State)

pub fn request(&mut self, req: RequestBuilder)

pub fn request_with_state(&mut self, req: RequestBuilder, state: T::State)

pub fn client(&self) -> &Client

Auto Trait Implementations

impl<T> !RefUnwindSafe for Crawler<T>

impl<T> !Send for Crawler<T>

impl<T> !Sync for Crawler<T>

impl<T> Unpin for Crawler<T> where <T as Scraper>::Output: Unpin, <T as Scraper>::State: Unpin,

impl<T> !UnwindSafe for Crawler<T>

Blanket Implementations

impl<T> Any for T where T: 'static + ?Sized,

pub fn type_id(&self) -> TypeId

impl<T> Borrow<T> for T where T: ?Sized,

pub fn borrow(&self) -> &T

impl<T> BorrowMut<T> for T where T: ?Sized,

pub fn borrow_mut(&mut self) -> &mut T

impl<T> From<T> for T

pub fn from(t: T) -> T

impl<T> Instrument for T

fn instrument(self, span: Span) -> Instrumented<Self>

fn in_current_span(self) -> Instrumented<Self>

impl<T, U> Into<U> for T where U: From<T>,

pub fn into(self) -> U

impl<T, U> TryFrom<U> for T where U: Into<T>,

type Error = Infallible

pub fn try_from(value: U) -> Result<T, <T as TryFrom<U>>::Error>

impl<T, U> TryInto<U> for T where U: TryFrom<T>,

type Error = <U as TryFrom<T>>::Error

pub fn try_into(self) -> Result<U, <U as TryFrom<T>>::Error>

impl<V, T> VZip<V> for T where V: MultiLane<T>,

pub fn vzip(self) -> V

impl<T> Crawler<T> where
T: Scraper + Unpin + 'static,
<T as Scraper>::State: Unpin + Send + Sync + 'static,
<T as Scraper>::Output: Unpin,

pub fn crawl<TCrawlFunction, TCrawlFuture>(&mut self, fun: TCrawlFunction) where
TCrawlFunction: FnOnce(&Client) -> TCrawlFuture,
TCrawlFuture: Future<Output = Result<(Response, Option<T::State>)>> + 'static,

pub fn complete<TCrawlFunction, TCrawlFuture>(&mut self, fun: TCrawlFunction) where
TCrawlFunction: FnOnce(&Client) -> TCrawlFuture,
TCrawlFuture: Future<Output = Result<Option<T::Output>>> + 'static,

impl<T> Unpin for Crawler<T> where
<T as Scraper>::Output: Unpin,
<T as Scraper>::State: Unpin,

impl<T> Any for T where
T: 'static + ?Sized,

impl<T> Borrow<T> for T where
T: ?Sized,

impl<T> BorrowMut<T> for T where
T: ?Sized,

impl<T, U> Into<U> for T where
U: From<T>,

impl<T, U> TryFrom<U> for T where
U: Into<T>,

impl<T, U> TryInto<U> for T where
U: TryFrom<T>,

impl<V, T> VZip<V> for T where
V: MultiLane<T>,