pub struct Crawler<S: Spider, C> { /* private fields */ }Implementations§
Source§impl<S, C> Crawler<S, C>
impl<S, C> Crawler<S, C>
Sourcepub async fn start_crawl(self) -> Result<(), SpiderError>
pub async fn start_crawl(self) -> Result<(), SpiderError>
Starts the crawl.
Examples found in repository?
examples/quotes_scraper.rs (line 99)
73async fn main() -> Result<(), SpiderError> {
74 tracing_subscriber::fmt()
75 .with_env_filter("info,spider_lib=debug")
76 .without_time()
77 .init();
78
79 let crawler = CrawlerBuilder::<_, ReqwestClientDownloader>::new(QuotesSpider)
80 .add_pipeline(DeduplicationPipeline::new(&["text"]))
81 .add_pipeline(CsvExporterPipeline::<QuoteItem>::new("output/quotes.csv")?)
82 .add_middleware(HttpCacheMiddleware::builder().build()?)
83 .add_middleware(UserAgentMiddleware::builder().build()?)
84 .add_middleware(RobotsTxtMiddleware::new())
85 .add_middleware(
86 RefererMiddleware::new()
87 .same_origin_only(true)
88 .max_chain_length(100)
89 .include_fragment(false),
90 )
91 .with_checkpoint_path("output/quotes.bin")
92 .with_checkpoint_interval(Duration::from_secs(15))
93 .max_concurrent_downloads(5)
94 .max_parser_workers(2)
95 .max_concurrent_pipelines(2)
96 .build()
97 .await?;
98
99 crawler.start_crawl().await?;
100
101 Ok(())
102}Auto Trait Implementations§
impl<S, C> Freeze for Crawler<S, C>
impl<S, C> !RefUnwindSafe for Crawler<S, C>
impl<S, C> Send for Crawler<S, C>
impl<S, C> Sync for Crawler<S, C>
impl<S, C> Unpin for Crawler<S, C>
impl<S, C> !UnwindSafe for Crawler<S, C>
Blanket Implementations§
Source§impl<T> BorrowMut<T> for Twhere
T: ?Sized,
impl<T> BorrowMut<T> for Twhere
T: ?Sized,
Source§fn borrow_mut(&mut self) -> &mut T
fn borrow_mut(&mut self) -> &mut T
Mutably borrows from an owned value. Read more