Skip to main content

spider_core/engine/
context.rs

1//! Shared task context used inside the crawler engine.
2
3use crate::{Scheduler, spider::Spider, stats::StatCollector};
4use spider_pipeline::pipeline::Pipeline;
5use spider_util::item::ScrapedItem;
6use std::sync::Arc;
7
8/// Inner data shared across crawler tasks.
9pub struct CrawlerContextInner<S, I>
10where
11    S: Spider<Item = I>,
12    I: ScrapedItem,
13{
14    pub scheduler: Arc<Scheduler>,
15    pub stats: Arc<StatCollector>,
16    pub spider: Arc<S>,
17    pub spider_state: Arc<S::State>,
18    pub pipelines: Arc<Vec<Box<dyn Pipeline<I>>>>,
19}
20
21/// Cheaply cloneable wrapper around the engine's shared context payload.
22pub struct CrawlerContext<S, I>(pub Arc<CrawlerContextInner<S, I>>)
23where
24    S: Spider<Item = I>,
25    I: ScrapedItem;
26
27impl<S, I> Clone for CrawlerContext<S, I>
28where
29    S: Spider<Item = I>,
30    I: ScrapedItem,
31{
32    fn clone(&self) -> Self {
33        CrawlerContext(Arc::clone(&self.0))
34    }
35}
36
37impl<S, I> CrawlerContext<S, I>
38where
39    S: Spider<Item = I>,
40    I: ScrapedItem,
41{
42    /// Creates a new CrawlerContext with the given components.
43    pub fn new(
44        scheduler: Arc<Scheduler>,
45        stats: Arc<StatCollector>,
46        spider: Arc<S>,
47        spider_state: Arc<S::State>,
48        pipelines: Arc<Vec<Box<dyn Pipeline<I>>>>,
49    ) -> Self {
50        CrawlerContext(Arc::new(CrawlerContextInner {
51            scheduler,
52            stats,
53            spider,
54            spider_state,
55            pipelines,
56        }))
57    }
58
59    /// Creates a CrawlerContext from a Crawler instance.
60    pub fn from_crawler(
61        scheduler: Arc<Scheduler>,
62        stats: Arc<StatCollector>,
63        spider: Arc<S>,
64        spider_state: Arc<S::State>,
65        pipelines: Arc<Vec<Box<dyn Pipeline<I>>>>,
66    ) -> Self {
67        Self::new(scheduler, stats, spider, spider_state, pipelines)
68    }
69}
70
71// Implement Deref for convenient access to inner fields
72impl<S, I> std::ops::Deref for CrawlerContext<S, I>
73where
74    S: Spider<Item = I>,
75    I: ScrapedItem,
76{
77    type Target = CrawlerContextInner<S, I>;
78
79    fn deref(&self) -> &Self::Target {
80        &self.0
81    }
82}