cloudscraper_rs/modules/events/
mod.rs

1//! Event system for the advanced infrastructure.
2//!
3//! Provides hooks for metrics, logging, and custom reactions around pipeline
4//! activity.
5
6use chrono::{DateTime, Utc};
7use http::{HeaderMap, Method};
8use std::sync::Arc;
9use std::time::Duration;
10use url::Url;
11
12use super::metrics::MetricsCollector;
13
14/// Structured pre-request event.
15#[derive(Debug, Clone)]
16pub struct PreRequestEvent {
17    pub url: Url,
18    pub method: Method,
19    pub headers: HeaderMap,
20    pub timestamp: DateTime<Utc>,
21}
22
23/// Structured post-response event.
24#[derive(Debug, Clone)]
25pub struct PostResponseEvent {
26    pub url: Url,
27    pub method: Method,
28    pub status: u16,
29    pub latency: Duration,
30    pub timestamp: DateTime<Utc>,
31}
32
33#[derive(Debug, Clone)]
34pub struct ChallengeEvent {
35    pub domain: String,
36    pub challenge_type: String,
37    pub success: bool,
38    pub metadata: Vec<(String, String)>,
39    pub timestamp: DateTime<Utc>,
40}
41
42#[derive(Debug, Clone)]
43pub struct ErrorEvent {
44    pub domain: String,
45    pub error: String,
46    pub timestamp: DateTime<Utc>,
47}
48
49#[derive(Debug, Clone)]
50pub struct RetryEvent {
51    pub domain: String,
52    pub attempt: u32,
53    pub reason: String,
54    pub scheduled_after: Duration,
55    pub timestamp: DateTime<Utc>,
56}
57
58#[derive(Debug, Clone)]
59pub enum ScraperEvent {
60    PreRequest(PreRequestEvent),
61    PostResponse(PostResponseEvent),
62    Challenge(ChallengeEvent),
63    Error(ErrorEvent),
64    Retry(RetryEvent),
65}
66
67/// Trait implemented by event handlers.
68pub trait EventHandler: Send + Sync {
69    fn handle(&self, event: &ScraperEvent);
70}
71
72/// Dispatcher that broadcasts events to registered handlers.
73#[derive(Default)]
74pub struct EventDispatcher {
75    handlers: Vec<Arc<dyn EventHandler>>,
76}
77
78impl EventDispatcher {
79    pub fn new() -> Self {
80        Self {
81            handlers: Vec::new(),
82        }
83    }
84
85    pub fn register_handler(&mut self, handler: Arc<dyn EventHandler>) {
86        self.handlers.push(handler);
87    }
88
89    pub fn dispatch(&self, event: ScraperEvent) {
90        for handler in &self.handlers {
91            handler.handle(&event);
92        }
93    }
94}
95
96/// Logs events using the `log` crate.
97#[derive(Debug)]
98pub struct LoggingHandler;
99
100impl EventHandler for LoggingHandler {
101    fn handle(&self, event: &ScraperEvent) {
102        match event {
103            ScraperEvent::PreRequest(pre) => {
104                log::debug!("-> {} {}", pre.method, pre.url);
105            }
106            ScraperEvent::PostResponse(post) => {
107                log::debug!(
108                    "<- {} {} -> {} ({:.2}s)",
109                    post.method,
110                    post.url,
111                    post.status,
112                    post.latency.as_secs_f64()
113                );
114            }
115            ScraperEvent::Challenge(challenge) => {
116                log::info!(
117                    "challenge {} ({}) success={}",
118                    challenge.domain,
119                    challenge.challenge_type,
120                    challenge.success
121                );
122            }
123            ScraperEvent::Error(error) => {
124                log::warn!("warning {} -> {}", error.domain, error.error);
125            }
126            ScraperEvent::Retry(retry) => {
127                log::info!(
128                    "retry {} attempt {} after {:.2}s",
129                    retry.domain,
130                    retry.attempt,
131                    retry.scheduled_after.as_secs_f64()
132                );
133            }
134        }
135    }
136}
137
138/// Metrics handler that feeds the metrics collector.
139#[derive(Clone, Debug)]
140pub struct MetricsHandler {
141    metrics: MetricsCollector,
142}
143
144impl MetricsHandler {
145    pub fn new(metrics: MetricsCollector) -> Self {
146        Self { metrics }
147    }
148}
149
150impl EventHandler for MetricsHandler {
151    fn handle(&self, event: &ScraperEvent) {
152        match event {
153            ScraperEvent::PostResponse(post) => {
154                self.metrics.record_response(
155                    post.url.host_str().unwrap_or(""),
156                    post.status,
157                    post.latency,
158                );
159            }
160            ScraperEvent::Error(error) => {
161                self.metrics.record_error(&error.domain);
162            }
163            _ => {}
164        }
165    }
166}
167
168#[cfg(test)]
169mod tests {
170    use super::*;
171
172    struct CountingHandler(std::sync::Mutex<usize>);
173
174    impl EventHandler for CountingHandler {
175        fn handle(&self, _event: &ScraperEvent) {
176            *self.0.lock().unwrap() += 1;
177        }
178    }
179
180    #[test]
181    fn dispatches_to_handlers() {
182        let mut dispatcher = EventDispatcher::new();
183        let counter = Arc::new(CountingHandler(std::sync::Mutex::new(0)));
184        dispatcher.register_handler(counter.clone());
185        dispatcher.dispatch(ScraperEvent::Error(ErrorEvent {
186            domain: "example.com".into(),
187            error: "timeout".into(),
188            timestamp: Utc::now(),
189        }));
190        assert_eq!(*counter.0.lock().unwrap(), 1);
191    }
192}