Skip to main content

spider_core/engine/
context.rs

1//! Aggregated context shared across all crawler tasks.
2//!
3//! This module provides the `CrawlerContext` struct, which bundles together
4//! all the shared state that needs to be passed between different crawler tasks.
5//! By wrapping this context in an Arc, we can clone it cheaply (just incrementing
6//! the reference count) instead of cloning each individual component.
7//!
8//! ## Benefits
9//!
10//! - **Reduced cloning overhead**: Single Arc::clone() instead of 5+ individual clones
11//! - **Better code organization**: Related state is grouped together
12//! - **Easier refactoring**: Adding new shared state only requires one change
13
14use std::sync::Arc;
15use crate::{Scheduler, stats::StatCollector, spider::Spider};
16use spider_pipeline::pipeline::Pipeline;
17use spider_util::item::ScrapedItem;
18
19/// Aggregated context shared across all crawler tasks.
20///
21/// This struct bundles together all the Arc-wrapped components that need to be
22/// shared between the crawler's various async tasks. Instead of cloning each
23/// Arc individually, we can clone this context with a single Arc::clone().
24pub struct CrawlerContext<S, I>
25where
26    S: Spider<Item = I>,
27    I: ScrapedItem,
28{
29    pub scheduler: Arc<Scheduler>,
30    pub stats: Arc<StatCollector>,
31    pub spider: Arc<S>,
32    pub spider_state: Arc<S::State>,
33    pub pipelines: Arc<Vec<Box<dyn Pipeline<I>>>>,
34}
35
36impl<S, I> Clone for CrawlerContext<S, I>
37where
38    S: Spider<Item = I>,
39    I: ScrapedItem,
40{
41    fn clone(&self) -> Self {
42        Self {
43            scheduler: Arc::clone(&self.scheduler),
44            stats: Arc::clone(&self.stats),
45            spider: Arc::clone(&self.spider),
46            spider_state: Arc::clone(&self.spider_state),
47            pipelines: Arc::clone(&self.pipelines),
48        }
49    }
50}
51
52impl<S, I> CrawlerContext<S, I>
53where
54    S: Spider<Item = I>,
55    I: ScrapedItem,
56{
57    /// Creates a new CrawlerContext with the given components.
58    pub fn new(
59        scheduler: Arc<Scheduler>,
60        stats: Arc<StatCollector>,
61        spider: Arc<S>,
62        spider_state: Arc<S::State>,
63        pipelines: Arc<Vec<Box<dyn Pipeline<I>>>>,
64    ) -> Self {
65        Self {
66            scheduler,
67            stats,
68            spider,
69            spider_state,
70            pipelines,
71        }
72    }
73
74    /// Creates a CrawlerContext from a Crawler instance.
75    pub fn from_crawler<C>(
76        scheduler: Arc<Scheduler>,
77        stats: Arc<StatCollector>,
78        spider: Arc<S>,
79        spider_state: Arc<S::State>,
80        pipelines: Arc<Vec<Box<dyn Pipeline<I>>>>,
81    ) -> Self {
82        Self::new(scheduler, stats, spider, spider_state, pipelines)
83    }
84}