Struct url_crawler::Crawler[−][src]

pub struct Crawler { /* fields omitted */ }

A configurable parallel web crawler.

Crawling does not occur until this type is consumed by the crawl method.

Methods

`impl Crawler`
[src]

`pub fn new( source: impl Into<CrawlerSource> ) -> Self`
[src]

Initializes a new crawler with a default thread count of 4.

`pub fn flags(self, flags: Flags) -> Self`
[src]

Set flags for configuring the crawler.

`pub fn threads(self, threads: usize) -> Self`
[src]

Specifies the number of fetcher threads to use.

Notes

If the input is 0, 1 thread will be used.
The default thread count is 4 when not using this method.

`pub fn errors(self, errors: ErrorsCallback) -> Self`
[src]

Allow the caller to handle errors.

Notes

Returning false will stop the crawler.

`pub fn pre_fetch(self, pre_fetch: PreFetchCallback) -> Self`
[src]

Enables filtering items based on their filename.

Notes

Returning false will prevent the item from being fetched.

`pub fn post_fetch(self, post_fetch: PostFetchCallback) -> Self`
[src]

Enables filtering items based on their filename and requested headers.

Notes

Returning false will prevent the item from being scraped / returned.

ⓘImportant traits for CrawlIter
Important traits for CrawlIter
`impl Iterator for CrawlIter type Item = UrlEntry;`
`pub fn crawl(self) -> CrawlIter`
[src]

Initializes the crawling, returning an iterator of discovered files.

The crawler will continue to crawl in background threads even while the iterator is not being pulled from.

Struct url_crawler::Crawler[−][src]

Methods

`impl Crawler`
[src]

`pub fn new( source: impl Into<CrawlerSource> ) -> Self`
[src]

`pub fn flags(self, flags: Flags) -> Self`
[src]

`pub fn threads(self, threads: usize) -> Self`
[src]

Notes

`pub fn errors(self, errors: ErrorsCallback) -> Self`
[src]

Notes

`pub fn pre_fetch(self, pre_fetch: PreFetchCallback) -> Self`
[src]

Notes

`pub fn post_fetch(self, post_fetch: PostFetchCallback) -> Self`
[src]

Notes

ⓘImportant traits for CrawlIter
Important traits for CrawlIter
`impl Iterator for CrawlIter type Item = UrlEntry;`
`pub fn crawl(self) -> CrawlIter`
[src]

Important traits for CrawlIter

Auto Trait Implementations

`impl Send for Crawler`

`impl Sync for Crawler`

Struct url_crawler::Crawler[−][src]

Methods

impl Crawler[src]

pub fn new( source: impl Into<CrawlerSource>) -> Self[src]

pub fn flags(self, flags: Flags) -> Self[src]

pub fn threads(self, threads: usize) -> Self[src]

Notes

pub fn errors(self, errors: ErrorsCallback) -> Self[src]

Notes

pub fn pre_fetch(self, pre_fetch: PreFetchCallback) -> Self[src]

Notes

pub fn post_fetch(self, post_fetch: PostFetchCallback) -> Self[src]

Notes

ⓘImportant traits for CrawlIterImportant traits for CrawlIterimpl Iterator for CrawlIter type Item = UrlEntry;pub fn crawl(self) -> CrawlIter[src]

Important traits for CrawlIter

Auto Trait Implementations

impl Send for Crawler

impl Sync for Crawler

`impl Crawler`
[src]

`pub fn new( source: impl Into<CrawlerSource> ) -> Self`
[src]

`pub fn flags(self, flags: Flags) -> Self`
[src]

`pub fn threads(self, threads: usize) -> Self`
[src]

`pub fn errors(self, errors: ErrorsCallback) -> Self`
[src]

`pub fn pre_fetch(self, pre_fetch: PreFetchCallback) -> Self`
[src]

`pub fn post_fetch(self, post_fetch: PostFetchCallback) -> Self`
[src]

ⓘImportant traits for CrawlIter
Important traits for CrawlIter
`impl Iterator for CrawlIter type Item = UrlEntry;`
`pub fn crawl(self) -> CrawlIter`
[src]

`impl Send for Crawler`

`impl Sync for Crawler`