Skip to main content

spider_browser/retry/
failure_tracker.rs

1//! Per-domain failure tracking (mirrors server hints.rs FailureTracker).
2//!
3//! Tracks `(domain, browser_type)` failure counts with a 10-minute TTL.
4//! Used by [`BrowserSelector`](super::browser_selector::BrowserSelector) to
5//! decide when to rotate browsers.
6//!
7//! Completely lock-free: uses [`DashMap`] for concurrent read/write access
8//! with no `Mutex` anywhere.
9
10use std::sync::atomic::{AtomicU64, Ordering};
11use std::time::{SystemTime, UNIX_EPOCH};
12
13use dashmap::DashMap;
14
15/// Mirrors `hints.rs` FAILURE_TTL (10 minutes).
16const FAILURE_TTL_MS: u64 = 10 * 60 * 1000;
17
18/// Mirrors `hints.rs` ROTATE_AFTER_FAILURES.
19pub const ROTATE_AFTER_FAILURES: u32 = 2;
20
21/// A single failure record: count + last failure timestamp.
22#[derive(Debug)]
23struct FailureRecord {
24    /// Number of consecutive failures.
25    count: AtomicU64,
26    /// Timestamp (ms since UNIX epoch) of the last failure.
27    last_failure: AtomicU64,
28}
29
30impl FailureRecord {
31    fn new(now_ms: u64) -> Self {
32        Self {
33            count: AtomicU64::new(1),
34            last_failure: AtomicU64::new(now_ms),
35        }
36    }
37}
38
39/// Per-domain, per-browser failure tracker with 10-minute TTL.
40///
41/// All methods are `&self` (no `&mut self`), making this safe to share
42/// across tasks via `Arc<FailureTracker>`.
43#[derive(Debug, Default)]
44pub struct FailureTracker {
45    /// Key: `"{domain}::{browser}"` -> failure record.
46    failures: DashMap<String, FailureRecord>,
47}
48
49impl FailureTracker {
50    /// Create a new, empty failure tracker.
51    pub fn new() -> Self {
52        Self {
53            failures: DashMap::new(),
54        }
55    }
56
57    /// Record a failure for a domain + browser pair.
58    pub fn record_failure(&self, domain: &str, browser: &str) {
59        let key = make_key(domain, browser);
60        let now = now_ms();
61
62        // Try to update an existing record first.
63        if let Some(existing) = self.failures.get(&key) {
64            existing.count.fetch_add(1, Ordering::Relaxed);
65            existing.last_failure.store(now, Ordering::Relaxed);
66            return;
67        }
68
69        // Insert a new record. There is a benign race: another thread could
70        // insert between the `get` above and the `entry` below. `or_insert_with`
71        // handles that correctly -- the first writer wins and we just bump the
72        // existing entry.
73        self.failures
74            .entry(key)
75            .and_modify(|rec| {
76                rec.count.fetch_add(1, Ordering::Relaxed);
77                rec.last_failure.store(now, Ordering::Relaxed);
78            })
79            .or_insert_with(|| FailureRecord::new(now));
80    }
81
82    /// Record a success -- clears the failure counter for a domain + browser.
83    pub fn record_success(&self, domain: &str, browser: &str) {
84        self.failures.remove(&make_key(domain, browser));
85    }
86
87    /// Get failure count (0 if expired or not found).
88    pub fn failure_count(&self, domain: &str, browser: &str) -> u32 {
89        let key = make_key(domain, browser);
90        let now = now_ms();
91
92        if let Some(record) = self.failures.get(&key) {
93            let last = record.last_failure.load(Ordering::Relaxed);
94            if now.saturating_sub(last) > FAILURE_TTL_MS {
95                // Expired -- drop the read guard, then remove.
96                drop(record);
97                self.failures.remove(&key);
98                return 0;
99            }
100            record.count.load(Ordering::Relaxed) as u32
101        } else {
102            0
103        }
104    }
105
106    /// Get total failures across all browsers for a domain (unexpired only).
107    pub fn total_failure_count(&self, domain: &str) -> u32 {
108        let prefix = format!("{domain}::");
109        let now = now_ms();
110        let mut total: u32 = 0;
111
112        for entry in self.failures.iter() {
113            if entry.key().starts_with(&prefix) {
114                let last = entry.value().last_failure.load(Ordering::Relaxed);
115                if now.saturating_sub(last) < FAILURE_TTL_MS {
116                    total += entry.value().count.load(Ordering::Relaxed) as u32;
117                }
118            }
119        }
120
121        total
122    }
123
124    /// Clear all failure records for a domain (used on stealth escalation).
125    pub fn clear(&self, domain: &str) {
126        let prefix = format!("{domain}::");
127        // Collect keys to remove -- cannot remove while iterating DashMap.
128        let keys_to_remove: Vec<String> = self
129            .failures
130            .iter()
131            .filter(|entry| entry.key().starts_with(&prefix))
132            .map(|entry| entry.key().clone())
133            .collect();
134
135        for key in keys_to_remove {
136            self.failures.remove(&key);
137        }
138    }
139
140    /// Clean up expired entries across all domains.
141    pub fn cleanup(&self) {
142        let now = now_ms();
143        let keys_to_remove: Vec<String> = self
144            .failures
145            .iter()
146            .filter(|entry| {
147                let last = entry.value().last_failure.load(Ordering::Relaxed);
148                now.saturating_sub(last) > FAILURE_TTL_MS
149            })
150            .map(|entry| entry.key().clone())
151            .collect();
152
153        for key in keys_to_remove {
154            self.failures.remove(&key);
155        }
156    }
157}
158
159/// Build the composite key `"{domain}::{browser}"`.
160fn make_key(domain: &str, browser: &str) -> String {
161    let mut key = String::with_capacity(domain.len() + 2 + browser.len());
162    key.push_str(domain);
163    key.push_str("::");
164    key.push_str(browser);
165    key
166}
167
168/// Current time in milliseconds since UNIX epoch.
169fn now_ms() -> u64 {
170    SystemTime::now()
171        .duration_since(UNIX_EPOCH)
172        .unwrap_or_default()
173        .as_millis() as u64
174}
175
176#[cfg(test)]
177mod tests {
178    use super::*;
179
180    #[test]
181    fn record_and_read_failure() {
182        let tracker = FailureTracker::new();
183        assert_eq!(tracker.failure_count("example.com", "chrome-h"), 0);
184
185        tracker.record_failure("example.com", "chrome-h");
186        assert_eq!(tracker.failure_count("example.com", "chrome-h"), 1);
187
188        tracker.record_failure("example.com", "chrome-h");
189        assert_eq!(tracker.failure_count("example.com", "chrome-h"), 2);
190    }
191
192    #[test]
193    fn record_success_clears() {
194        let tracker = FailureTracker::new();
195        tracker.record_failure("example.com", "chrome-h");
196        tracker.record_failure("example.com", "chrome-h");
197        assert_eq!(tracker.failure_count("example.com", "chrome-h"), 2);
198
199        tracker.record_success("example.com", "chrome-h");
200        assert_eq!(tracker.failure_count("example.com", "chrome-h"), 0);
201    }
202
203    #[test]
204    fn total_failure_count_across_browsers() {
205        let tracker = FailureTracker::new();
206        tracker.record_failure("example.com", "chrome-h");
207        tracker.record_failure("example.com", "chrome-new");
208        tracker.record_failure("example.com", "chrome-new");
209        tracker.record_failure("other.com", "firefox");
210
211        assert_eq!(tracker.total_failure_count("example.com"), 3);
212        assert_eq!(tracker.total_failure_count("other.com"), 1);
213        assert_eq!(tracker.total_failure_count("missing.com"), 0);
214    }
215
216    #[test]
217    fn clear_domain_removes_all_browsers() {
218        let tracker = FailureTracker::new();
219        tracker.record_failure("example.com", "chrome-h");
220        tracker.record_failure("example.com", "firefox");
221        tracker.record_failure("other.com", "chrome-h");
222
223        tracker.clear("example.com");
224
225        assert_eq!(tracker.failure_count("example.com", "chrome-h"), 0);
226        assert_eq!(tracker.failure_count("example.com", "firefox"), 0);
227        // other.com untouched
228        assert_eq!(tracker.failure_count("other.com", "chrome-h"), 1);
229    }
230
231    #[test]
232    fn cleanup_removes_nothing_when_fresh() {
233        let tracker = FailureTracker::new();
234        tracker.record_failure("example.com", "chrome-h");
235        tracker.cleanup();
236        assert_eq!(tracker.failure_count("example.com", "chrome-h"), 1);
237    }
238}