pub struct RefererMiddleware {
pub same_origin_only: bool,
pub max_chain_length: usize,
pub include_fragment: bool,
/* private fields */
}Expand description
Referer middleware that automatically sets Referer headers based on the navigation chain
Fields§
§same_origin_only: boolWhether to use same-origin only referer
max_chain_length: usizeMaximum referer chain length to keep in memory
include_fragment: boolWhether to include fragment in referer URL
Implementations§
Source§impl RefererMiddleware
impl RefererMiddleware
Sourcepub fn new() -> Self
pub fn new() -> Self
Create a new RefererMiddleware with default config
Examples found in repository?
examples/quotes_scraper.rs (line 86)
73async fn main() -> Result<(), SpiderError> {
74 tracing_subscriber::fmt()
75 .with_env_filter("info,spider_lib=debug")
76 .without_time()
77 .init();
78
79 let crawler = CrawlerBuilder::<_, ReqwestClientDownloader>::new(QuotesSpider)
80 .add_pipeline(DeduplicationPipeline::new(&["text"]))
81 .add_pipeline(CsvExporterPipeline::<QuoteItem>::new("output/quotes.csv")?)
82 .add_middleware(HttpCacheMiddleware::builder().build()?)
83 .add_middleware(UserAgentMiddleware::builder().build()?)
84 .add_middleware(RobotsTxtMiddleware::new())
85 .add_middleware(
86 RefererMiddleware::new()
87 .same_origin_only(true)
88 .max_chain_length(100)
89 .include_fragment(false),
90 )
91 .with_checkpoint_path("output/quotes.bin")
92 .with_checkpoint_interval(Duration::from_secs(15))
93 .max_concurrent_downloads(5)
94 .max_parser_workers(2)
95 .max_concurrent_pipelines(2)
96 .build()
97 .await?;
98
99 crawler.start_crawl().await?;
100
101 Ok(())
102}Sourcepub fn same_origin_only(self, same_origin_only: bool) -> Self
pub fn same_origin_only(self, same_origin_only: bool) -> Self
Set whether to use same-origin only referer.
Examples found in repository?
examples/quotes_scraper.rs (line 87)
73async fn main() -> Result<(), SpiderError> {
74 tracing_subscriber::fmt()
75 .with_env_filter("info,spider_lib=debug")
76 .without_time()
77 .init();
78
79 let crawler = CrawlerBuilder::<_, ReqwestClientDownloader>::new(QuotesSpider)
80 .add_pipeline(DeduplicationPipeline::new(&["text"]))
81 .add_pipeline(CsvExporterPipeline::<QuoteItem>::new("output/quotes.csv")?)
82 .add_middleware(HttpCacheMiddleware::builder().build()?)
83 .add_middleware(UserAgentMiddleware::builder().build()?)
84 .add_middleware(RobotsTxtMiddleware::new())
85 .add_middleware(
86 RefererMiddleware::new()
87 .same_origin_only(true)
88 .max_chain_length(100)
89 .include_fragment(false),
90 )
91 .with_checkpoint_path("output/quotes.bin")
92 .with_checkpoint_interval(Duration::from_secs(15))
93 .max_concurrent_downloads(5)
94 .max_parser_workers(2)
95 .max_concurrent_pipelines(2)
96 .build()
97 .await?;
98
99 crawler.start_crawl().await?;
100
101 Ok(())
102}Sourcepub fn max_chain_length(self, max_chain_length: usize) -> Self
pub fn max_chain_length(self, max_chain_length: usize) -> Self
Set the maximum referer chain length to keep in memory.
Examples found in repository?
examples/quotes_scraper.rs (line 88)
73async fn main() -> Result<(), SpiderError> {
74 tracing_subscriber::fmt()
75 .with_env_filter("info,spider_lib=debug")
76 .without_time()
77 .init();
78
79 let crawler = CrawlerBuilder::<_, ReqwestClientDownloader>::new(QuotesSpider)
80 .add_pipeline(DeduplicationPipeline::new(&["text"]))
81 .add_pipeline(CsvExporterPipeline::<QuoteItem>::new("output/quotes.csv")?)
82 .add_middleware(HttpCacheMiddleware::builder().build()?)
83 .add_middleware(UserAgentMiddleware::builder().build()?)
84 .add_middleware(RobotsTxtMiddleware::new())
85 .add_middleware(
86 RefererMiddleware::new()
87 .same_origin_only(true)
88 .max_chain_length(100)
89 .include_fragment(false),
90 )
91 .with_checkpoint_path("output/quotes.bin")
92 .with_checkpoint_interval(Duration::from_secs(15))
93 .max_concurrent_downloads(5)
94 .max_parser_workers(2)
95 .max_concurrent_pipelines(2)
96 .build()
97 .await?;
98
99 crawler.start_crawl().await?;
100
101 Ok(())
102}Sourcepub fn include_fragment(self, include_fragment: bool) -> Self
pub fn include_fragment(self, include_fragment: bool) -> Self
Set whether to include the fragment in the referer URL.
Examples found in repository?
examples/quotes_scraper.rs (line 89)
73async fn main() -> Result<(), SpiderError> {
74 tracing_subscriber::fmt()
75 .with_env_filter("info,spider_lib=debug")
76 .without_time()
77 .init();
78
79 let crawler = CrawlerBuilder::<_, ReqwestClientDownloader>::new(QuotesSpider)
80 .add_pipeline(DeduplicationPipeline::new(&["text"]))
81 .add_pipeline(CsvExporterPipeline::<QuoteItem>::new("output/quotes.csv")?)
82 .add_middleware(HttpCacheMiddleware::builder().build()?)
83 .add_middleware(UserAgentMiddleware::builder().build()?)
84 .add_middleware(RobotsTxtMiddleware::new())
85 .add_middleware(
86 RefererMiddleware::new()
87 .same_origin_only(true)
88 .max_chain_length(100)
89 .include_fragment(false),
90 )
91 .with_checkpoint_path("output/quotes.bin")
92 .with_checkpoint_interval(Duration::from_secs(15))
93 .max_concurrent_downloads(5)
94 .max_parser_workers(2)
95 .max_concurrent_pipelines(2)
96 .build()
97 .await?;
98
99 crawler.start_crawl().await?;
100
101 Ok(())
102}Trait Implementations§
Source§impl Clone for RefererMiddleware
impl Clone for RefererMiddleware
Source§fn clone(&self) -> RefererMiddleware
fn clone(&self) -> RefererMiddleware
Returns a duplicate of the value. Read more
1.0.0 · Source§fn clone_from(&mut self, source: &Self)
fn clone_from(&mut self, source: &Self)
Performs copy-assignment from
source. Read moreSource§impl Debug for RefererMiddleware
impl Debug for RefererMiddleware
Source§impl Default for RefererMiddleware
impl Default for RefererMiddleware
Source§impl<C: Send + Sync> Middleware<C> for RefererMiddleware
impl<C: Send + Sync> Middleware<C> for RefererMiddleware
fn name(&self) -> &str
fn process_request<'life0, 'life1, 'async_trait>(
&'life0 mut self,
_client: &'life1 C,
request: Request,
) -> Pin<Box<dyn Future<Output = Result<MiddlewareAction<Request>, SpiderError>> + Send + 'async_trait>>where
Self: 'async_trait,
'life0: 'async_trait,
'life1: 'async_trait,
fn process_response<'life0, 'async_trait>(
&'life0 mut self,
response: Response,
) -> Pin<Box<dyn Future<Output = Result<MiddlewareAction<Response>, SpiderError>> + Send + 'async_trait>>where
Self: 'async_trait,
'life0: 'async_trait,
Auto Trait Implementations§
impl Freeze for RefererMiddleware
impl !RefUnwindSafe for RefererMiddleware
impl Send for RefererMiddleware
impl Sync for RefererMiddleware
impl Unpin for RefererMiddleware
impl !UnwindSafe for RefererMiddleware
Blanket Implementations§
Source§impl<T> BorrowMut<T> for Twhere
T: ?Sized,
impl<T> BorrowMut<T> for Twhere
T: ?Sized,
Source§fn borrow_mut(&mut self) -> &mut T
fn borrow_mut(&mut self) -> &mut T
Mutably borrows from an owned value. Read more