Skip to main content

spider_browser/retry/
browser_selector.rs

1//! Browser selection and rotation logic.
2//!
3//! Mirrors the server's `hints.rs` browser rotation:
4//! 1. Try the current browser until [`ROTATE_AFTER_FAILURES`] consecutive failures.
5//! 2. Move to the next browser in [`BROWSER_ROTATION`] order.
6//! 3. Skip browsers that have also exceeded the failure threshold.
7
8use super::failure_tracker::{FailureTracker, ROTATE_AFTER_FAILURES};
9
10/// Primary browser rotation -- most stable Chrome backends.
11///
12/// `chrome-h` (ChromeXvfb) is the most reliable (99%), followed by
13/// `chrome-new` (92%). Shared chrome is excluded -- only 35-65% reliable.
14pub const PRIMARY_ROTATION: &[&str] = &["chrome-h", "chrome-new"];
15
16/// Extended browser rotation -- non-Chrome engines tried at max stealth only.
17pub const EXTENDED_ROTATION: &[&str] = &["firefox", "lightpanda", "servo"];
18
19/// Full browser rotation order for retry/failover: primary then extended.
20pub const BROWSER_ROTATION: &[&str] = &[
21    "chrome-h",
22    "chrome-new",
23    "firefox",
24    "lightpanda",
25    "servo",
26];
27
28/// Picks the next browser in rotation based on per-domain failure history.
29///
30/// Follows the server's `hints.rs` logic:
31/// 1. Try current browser until `ROTATE_AFTER_FAILURES` consecutive failures.
32/// 2. Then move to the next browser in `BROWSER_ROTATION` order.
33/// 3. Skip browsers that have also exceeded the failure threshold.
34pub struct BrowserSelector {
35    tracker: FailureTracker,
36}
37
38impl BrowserSelector {
39    /// Create a new selector backed by the given failure tracker.
40    pub fn new(tracker: FailureTracker) -> Self {
41        Self { tracker }
42    }
43
44    /// Borrow the underlying failure tracker.
45    pub fn failure_tracker(&self) -> &FailureTracker {
46        &self.tracker
47    }
48
49    /// Check if the current browser should be rotated for a domain.
50    ///
51    /// Returns `true` when the failure count for `(domain, current_browser)`
52    /// meets or exceeds [`ROTATE_AFTER_FAILURES`].
53    pub fn should_rotate(&self, domain: &str, current_browser: &str) -> bool {
54        self.tracker.failure_count(domain, current_browser) >= ROTATE_AFTER_FAILURES
55    }
56
57    /// Pick the next browser to try after `current_browser` has failed.
58    ///
59    /// Walks [`BROWSER_ROTATION`] starting from the position after
60    /// `current_browser`, wrapping around, and returns the first candidate
61    /// that has not exceeded the failure threshold for `domain`.
62    ///
63    /// Returns `None` if every browser in the rotation has been exhausted.
64    pub fn next_browser(&self, domain: &str, current_browser: &str) -> Option<&'static str> {
65        let current_idx = BROWSER_ROTATION
66            .iter()
67            .position(|&b| b == current_browser)
68            .unwrap_or(0);
69
70        for offset in 1..BROWSER_ROTATION.len() {
71            let idx = (current_idx + offset) % BROWSER_ROTATION.len();
72            let candidate = BROWSER_ROTATION[idx];
73            if self.tracker.failure_count(domain, candidate) < ROTATE_AFTER_FAILURES {
74                return Some(candidate);
75            }
76        }
77
78        None
79    }
80
81    /// Choose the best browser for a domain (mirrors `hints.rs` `choose_browser_for_domain`).
82    ///
83    /// Returns the first browser in [`BROWSER_ROTATION`] that has not
84    /// exceeded the failure threshold. Falls back to `fallback` if every
85    /// browser is exhausted.
86    pub fn choose_browser<'a>(&self, domain: &str, fallback: &'a str) -> &'a str
87    where
88        'static: 'a,
89    {
90        for &browser in BROWSER_ROTATION {
91            if self.tracker.failure_count(domain, browser) < ROTATE_AFTER_FAILURES {
92                return browser;
93            }
94        }
95        fallback
96    }
97}
98
99#[cfg(test)]
100mod tests {
101    use super::*;
102
103    fn make_selector() -> BrowserSelector {
104        BrowserSelector::new(FailureTracker::new())
105    }
106
107    #[test]
108    fn rotation_constants_are_consistent() {
109        // BROWSER_ROTATION should be PRIMARY + EXTENDED in order.
110        let mut expected: Vec<&str> = PRIMARY_ROTATION.to_vec();
111        expected.extend_from_slice(EXTENDED_ROTATION);
112        assert_eq!(BROWSER_ROTATION, expected.as_slice());
113    }
114
115    #[test]
116    fn should_rotate_after_threshold() {
117        let sel = make_selector();
118        assert!(!sel.should_rotate("example.com", "chrome-h"));
119
120        sel.failure_tracker().record_failure("example.com", "chrome-h");
121        assert!(!sel.should_rotate("example.com", "chrome-h"));
122
123        sel.failure_tracker().record_failure("example.com", "chrome-h");
124        assert!(sel.should_rotate("example.com", "chrome-h"));
125    }
126
127    #[test]
128    fn next_browser_skips_exhausted() {
129        let sel = make_selector();
130
131        // Exhaust chrome-h
132        sel.failure_tracker().record_failure("d.com", "chrome-h");
133        sel.failure_tracker().record_failure("d.com", "chrome-h");
134
135        // Next after chrome-h should skip chrome-h, return chrome-new
136        assert_eq!(sel.next_browser("d.com", "chrome-h"), Some("chrome-new"));
137
138        // Exhaust chrome-new too
139        sel.failure_tracker().record_failure("d.com", "chrome-new");
140        sel.failure_tracker().record_failure("d.com", "chrome-new");
141
142        assert_eq!(sel.next_browser("d.com", "chrome-h"), Some("firefox"));
143    }
144
145    #[test]
146    fn next_browser_returns_none_when_all_exhausted() {
147        let sel = make_selector();
148        for &browser in BROWSER_ROTATION {
149            sel.failure_tracker().record_failure("d.com", browser);
150            sel.failure_tracker().record_failure("d.com", browser);
151        }
152        assert_eq!(sel.next_browser("d.com", "chrome-h"), None);
153    }
154
155    #[test]
156    fn choose_browser_picks_first_available() {
157        let sel = make_selector();
158
159        // No failures -- picks first in rotation
160        assert_eq!(sel.choose_browser("d.com", "fallback"), "chrome-h");
161
162        // Exhaust chrome-h
163        sel.failure_tracker().record_failure("d.com", "chrome-h");
164        sel.failure_tracker().record_failure("d.com", "chrome-h");
165        assert_eq!(sel.choose_browser("d.com", "fallback"), "chrome-new");
166    }
167
168    #[test]
169    fn choose_browser_falls_back_when_all_exhausted() {
170        let sel = make_selector();
171        for &browser in BROWSER_ROTATION {
172            sel.failure_tracker().record_failure("d.com", browser);
173            sel.failure_tracker().record_failure("d.com", browser);
174        }
175        assert_eq!(sel.choose_browser("d.com", "fallback"), "fallback");
176    }
177
178    #[test]
179    fn next_browser_wraps_around() {
180        let sel = make_selector();
181        // Starting from the last browser, should wrap to the first
182        let last = *BROWSER_ROTATION.last().unwrap();
183        assert_eq!(sel.next_browser("d.com", last), Some("chrome-h"));
184    }
185}