Skip to main content

spider_browser/retry/
retry_engine.rs

1//! Smart retry engine with stealth-first escalation.
2//!
3//! **Strategy: escalate proxy quality (stealth) FAST, try alternative engines LAST.**
4//!
5//! - **Phase 1 (Stealth escalation)**: For each stealth level `[0 -> max]`,
6//!   try PRIMARY browsers `[chrome-h, chrome-new]` with transient retries.
7//!   If blocked -> skip remaining primary browsers, escalate stealth immediately.
8//!
9//! - **Phase 2 (Extended rotation)**: Only at max stealth, if still blocked,
10//!   try EXTENDED browsers `[firefox, lightpanda, servo]` for different engine
11//!   fingerprints + best proxies.
12//!
13//! ```text
14//! Phase 1 -- stealth escalation across primary browsers:
15//!   for each stealth level (initial -> maxStealthLevel):
16//!     for each PRIMARY browser [chrome-h, chrome-new]:
17//!       attempt action
18//!       if transient   -> reconnect same browser, retry up to 2x
19//!       if blocked     -> skip remaining primary, escalate stealth immediately
20//!       if backend_down-> mark down, next browser
21//!       if auth        -> throw immediately
22//!       if rate_limit  -> wait, retry same browser
23//!
24//! Phase 2 -- extended rotation at max stealth only (if blocked):
25//!   for each EXTENDED browser [firefox, lightpanda, servo]:
26//!     single attempt (no transient retries)
27//! ```
28
29use std::collections::HashSet;
30use std::future::Future;
31use std::sync::atomic::{AtomicU32, Ordering};
32use std::sync::Arc;
33
34use serde_json::json;
35use tracing::{info, warn};
36use url::Url;
37
38use crate::errors::SpiderError;
39use crate::events::SpiderEventEmitter;
40use crate::protocol::protocol_adapter::{ProtocolAdapter, ProtocolAdapterOptions};
41use crate::protocol::transport::{Transport, TransportOptions};
42
43use super::browser_selector::{BrowserSelector, EXTENDED_ROTATION, PRIMARY_ROTATION};
44use super::failure_tracker::FailureTracker;
45use super::keyword_classifier::KeywordClassifier;
46
47// ---------------------------------------------------------------------------
48// Error classification
49// ---------------------------------------------------------------------------
50
51/// Error classification for retry decisions.
52#[derive(Debug, Clone, Copy, PartialEq, Eq)]
53pub enum ErrorClass {
54    Transient,
55    Blocked,
56    BackendDown,
57    Auth,
58    RateLimit,
59}
60
61/// Build the error classifier (Aho-Corasick, O(n) single-pass).
62///
63/// Rules are priority-ordered -- first match wins.
64fn build_error_classifier() -> KeywordClassifier<ErrorClass> {
65    KeywordClassifier::new(&[
66        // Blocked -- checked first (most common heuristic case)
67        (
68            &[
69                "bot detect",
70                "are you a robot",
71                "bot or not",
72                "blocked",
73                "403",
74                "captcha",
75                "network security",
76                "human verification",
77                "verify you are human",
78                "show us your human side",
79                "can't tell if you're a human",
80                "checking your browser",
81                "bot protection",
82                "automated access",
83                "suspected automated",
84                "prove you're not a robot",
85                "pardon our interruption",
86                "powered and protected by",
87                "request could not be processed",
88                "access to this page has been denied",
89                "access denied",
90                "please complete the security check",
91                "enable cookies",
92                "browser check",
93                "just a moment",
94                "rate limit exceeded",
95                "too many requests",
96                "err_blocked_by_client",
97            ],
98            ErrorClass::Blocked,
99        ),
100        // Auth
101        (&["401", "402", "unauthorized"], ErrorClass::Auth),
102        // Backend down
103        (
104            &[
105                "backend unavailable",
106                "no backend",
107                "service unavailable",
108                "503",
109                "failed to create page target",
110                "unexpected server response",
111            ],
112            ErrorClass::BackendDown,
113        ),
114        // Transient (connection)
115        (
116            &[
117                "err_connection_reset",
118                "err_connection_closed",
119                "err_empty_response",
120                "err_ssl_protocol_error",
121                "err_ssl_version_or_cipher_mismatch",
122                "err_cert",
123                "timeout",
124            ],
125            ErrorClass::Transient,
126        ),
127        // Transient (WebSocket / session)
128        (
129            &[
130                "websocket is not connected",
131                "websocket closed",
132                "session with given id not found",
133                "content contamination",
134                "insufficient content",
135            ],
136            ErrorClass::Transient,
137        ),
138    ])
139}
140
141/// Build the disconnection classifier (determines whether to reconnect).
142fn build_disconnection_classifier() -> KeywordClassifier<bool> {
143    KeywordClassifier::new(&[
144        // NOT disconnections (page-level) -- checked first
145        (&["err_blocked_by_client"], false),
146        // Actual disconnections
147        (
148            &[
149                "websocket is not connected",
150                "websocket closed",
151                "session destroyed",
152                "session with given id not found",
153                "err_connection_reset",
154                "err_connection_closed",
155                "err_empty_response",
156                "socket hang up",
157                "err_aborted",
158                "content contamination",
159                "insufficient content",
160                "err_ssl_protocol_error",
161                "err_ssl_version_or_cipher_mismatch",
162            ],
163            true,
164        ),
165    ])
166}
167
168// ---------------------------------------------------------------------------
169// Configuration
170// ---------------------------------------------------------------------------
171
172/// Options for constructing a [`RetryEngine`].
173pub struct RetryOptions {
174    pub max_retries: u32,
175    pub transport_opts: TransportOptions,
176    pub emitter: SpiderEventEmitter,
177    /// Maximum stealth level to escalate to (1-3, default 3).
178    pub max_stealth_level: Option<u32>,
179    /// Timeout for retry attempts -- shorter than first try (default: 15 000 ms).
180    pub retry_timeout_ms: Option<u64>,
181    /// CDP/BiDi command timeout in ms, passed through to new adapters (default: 30 000).
182    pub command_timeout_ms: Option<u64>,
183}
184
185/// Mutable context threaded through retry attempts.
186pub struct RetryContext {
187    pub transport: Arc<Transport>,
188    pub adapter: ProtocolAdapter,
189    pub current_url: Option<String>,
190    pub on_adapter_changed: Box<dyn Fn(&ProtocolAdapter) + Send + Sync>,
191}
192
193// ---------------------------------------------------------------------------
194// Result of a single-browser try
195// ---------------------------------------------------------------------------
196
197struct TryResult<T> {
198    success: bool,
199    value: Option<T>,
200    total_attempts: u32,
201    tried_action: bool,
202    last_error: Option<SpiderError>,
203}
204
205// ---------------------------------------------------------------------------
206// RetryEngine
207// ---------------------------------------------------------------------------
208
209/// Smart retry engine with stealth-first escalation and browser rotation.
210///
211/// All internal state is lock-free: atomics for counters, `DashMap`-backed
212/// `FailureTracker`, and plain local sets for down-backend tracking (the
213/// engine is `!Sync` by design -- a single owner drives the retry loop).
214pub struct RetryEngine {
215    opts: RetryOptions,
216    selector: BrowserSelector,
217    current_stealth_level: AtomicU32,
218    max_stealth_level: u32,
219    retry_timeout_ms: u64,
220    command_timeout_ms: u64,
221    /// Browser backends that returned 503/unavailable -- persists across stealth levels.
222    down_backends: HashSet<String>,
223    /// Count of timeout errors -- used for progressive timeout escalation.
224    timeout_count: AtomicU32,
225    /// Pre-built Aho-Corasick classifiers (zero per-call cost).
226    error_classifier: KeywordClassifier<ErrorClass>,
227    disconnection_classifier: KeywordClassifier<bool>,
228}
229
230impl RetryEngine {
231    /// Create a new retry engine from the given options.
232    pub fn new(opts: RetryOptions) -> Self {
233        let stealth = opts.transport_opts.stealth_level;
234        let max_stealth = opts.max_stealth_level.unwrap_or(3);
235        let retry_timeout = opts.retry_timeout_ms.unwrap_or(15_000);
236        let cmd_timeout = opts.command_timeout_ms.unwrap_or(30_000);
237
238        Self {
239            selector: BrowserSelector::new(FailureTracker::new()),
240            current_stealth_level: AtomicU32::new(stealth),
241            max_stealth_level: max_stealth,
242            retry_timeout_ms: retry_timeout,
243            command_timeout_ms: cmd_timeout,
244            down_backends: HashSet::new(),
245            timeout_count: AtomicU32::new(0),
246            error_classifier: build_error_classifier(),
247            disconnection_classifier: build_disconnection_classifier(),
248            opts,
249        }
250    }
251
252    /// Current stealth level (0 = auto, 1-3 = explicit tiers).
253    pub fn stealth_level(&self) -> u32 {
254        self.current_stealth_level.load(Ordering::Relaxed)
255    }
256
257    /// Progressive page timeout: fail fast initially, escalate on retries.
258    ///
259    /// 1st attempt: 35 s, 2nd: 50 s, 3rd+: 65 s.
260    /// Must exceed server-side nav timeout (20 s) + retry (15 s).
261    fn progressive_timeout(&self) -> u64 {
262        match self.timeout_count.load(Ordering::Relaxed) {
263            0 => 35_000,
264            1 => 50_000,
265            _ => 65_000,
266        }
267    }
268
269    /// Execute an action with stealth-first retry across browsers and stealth levels.
270    pub async fn execute<T, F, Fut>(
271        &mut self,
272        mut make_future: F,
273        ctx: &mut RetryContext,
274    ) -> Result<T, SpiderError>
275    where
276        F: FnMut() -> Fut,
277        Fut: Future<Output = Result<T, SpiderError>>,
278    {
279        let mut last_error: Option<SpiderError> = None;
280        let mut total_attempts: u32 = 0;
281        let budget = self.opts.max_retries + 1; // total attempts allowed
282        self.down_backends.clear();
283
284        let stealth_levels = self.get_stealth_progression();
285        let initial_browser = ctx.transport.browser();
286        let mut consecutive_disconnects: u32 = 0;
287        let mut was_blocked = false;
288        let mut had_timeout = false;
289        let mut phase1_timeouts: u32 = 0;
290
291        // ---- Phase 1: Stealth escalation across primary browsers ----
292        for si in 0..stealth_levels.len() {
293            if total_attempts >= budget {
294                break;
295            }
296            // 1+ timeout on Chrome -> different Chrome variant won't help.
297            // Jump to Phase 2 (Firefox).
298            if phase1_timeouts >= 1 {
299                had_timeout = true;
300                break;
301            }
302
303            let stealth = stealth_levels[si];
304
305            // Stealth escalation (skip for first level)
306            if si > 0 {
307                let prev = stealth_levels[si - 1];
308                self.current_stealth_level.store(stealth, Ordering::Relaxed);
309                ctx.transport.set_stealth_level(stealth);
310
311                info!("retry: escalating stealth {} -> {}", prev, stealth);
312                self.opts.emitter.emit(
313                    "stealth.escalated",
314                    json!({
315                        "from": prev,
316                        "to": stealth,
317                        "reason": last_error.as_ref().map(|e| format!("{:?}", self.classify_error(e))).unwrap_or_else(|| "exhausted".into()),
318                    }),
319                );
320
321                // Clear domain failure tracking so all browsers are available again
322                if let Some(domain) = Self::extract_domain(ctx.current_url.as_deref()) {
323                    self.selector.failure_tracker().clear(&domain);
324                }
325            }
326
327            let primary_browsers: Vec<&str> = if si == 0 {
328                Self::ordered_primary_browsers(&initial_browser)
329            } else {
330                PRIMARY_ROTATION.to_vec()
331            };
332
333            let mut tried_any = false;
334
335            for &browser in &primary_browsers {
336                if total_attempts >= budget {
337                    break;
338                }
339                if self.down_backends.contains(browser) {
340                    continue;
341                }
342                // 6+ consecutive WS disconnects -> server overloaded
343                if consecutive_disconnects >= 6 {
344                    warn!("retry: 6+ consecutive disconnects, server overloaded -- aborting");
345                    break;
346                }
347
348                let result = self
349                    .try_browser(&mut make_future, ctx, browser, stealth, total_attempts, budget, true)
350                    .await;
351
352                total_attempts = result.total_attempts;
353                if result.success {
354                    return Ok(result.value.unwrap());
355                }
356                if result.tried_action {
357                    tried_any = true;
358                }
359                if let Some(ref err) = result.last_error {
360                    let error_class = self.classify_error(err);
361                    was_blocked = error_class == ErrorClass::Blocked;
362                    if error_class == ErrorClass::Auth {
363                        return Err(result.last_error.unwrap());
364                    }
365                    // Track consecutive disconnects
366                    if self.is_disconnection_error(err) {
367                        consecutive_disconnects += 1;
368                    } else {
369                        consecutive_disconnects = 0;
370                    }
371                    // Timeout -> track count, break to Phase 2
372                    if matches!(err, SpiderError::Timeout(_)) {
373                        phase1_timeouts += 1;
374                        self.timeout_count.fetch_add(1, Ordering::Relaxed);
375                        had_timeout = true;
376                        break;
377                    }
378                    // Blocked -> skip remaining primary, escalate stealth
379                    if was_blocked {
380                        break;
381                    }
382                }
383                last_error = result.last_error.or(last_error);
384            }
385
386            if !tried_any {
387                warn!("retry: all browser backends unavailable, stopping");
388                break;
389            }
390        }
391
392        // ---- Phase 2: Extended rotation at max stealth ----
393        if (was_blocked || had_timeout || total_attempts > 0)
394            && total_attempts < budget
395            && !EXTENDED_ROTATION.is_empty()
396        {
397            for &browser in EXTENDED_ROTATION {
398                if total_attempts >= budget {
399                    break;
400                }
401                if self.down_backends.contains(browser) {
402                    continue;
403                }
404
405                let max_stealth =
406                    stealth_levels.last().copied().unwrap_or(self.max_stealth_level);
407                let result = self
408                    .try_browser(&mut make_future, ctx, browser, max_stealth, total_attempts, budget, false)
409                    .await;
410
411                total_attempts = result.total_attempts;
412                if result.success {
413                    return Ok(result.value.unwrap());
414                }
415                if let Some(ref err) = result.last_error {
416                    if self.classify_error(err) == ErrorClass::Auth {
417                        return Err(result.last_error.unwrap());
418                    }
419                }
420                last_error = result.last_error.or(last_error);
421            }
422        }
423
424        Err(last_error
425            .unwrap_or_else(|| SpiderError::Other("All browsers and stealth levels exhausted".into())))
426    }
427
428    /// Attempt an action on a specific browser, with optional transient retries.
429    async fn try_browser<T, F, Fut>(
430        &mut self,
431        make_future: &mut F,
432        ctx: &mut RetryContext,
433        browser: &str,
434        stealth: u32,
435        mut total_attempts: u32,
436        budget: u32,
437        allow_transient_retries: bool,
438    ) -> TryResult<T>
439    where
440        F: FnMut() -> Fut,
441        Fut: Future<Output = Result<T, SpiderError>>,
442    {
443        let last_error: Option<SpiderError> = None;
444
445        // Switch browser (skip on very first attempt -- already connected)
446        if total_attempts > 0 {
447            let prev_browser = ctx.transport.browser();
448            info!(
449                "retry: switching {} -> {} (stealth={})",
450                prev_browser, browser, stealth
451            );
452            self.opts.emitter.emit(
453                "browser.switching",
454                json!({
455                    "from": prev_browser,
456                    "to": browser,
457                    "reason": last_error.as_ref().map(|e| format!("{:?}", self.classify_error(e))).unwrap_or_else(|| "rotation".into()),
458                }),
459            );
460
461            match self.switch_browser(ctx, browser).await {
462                Ok(()) => {
463                    self.opts
464                        .emitter
465                        .emit("browser.switched", json!({ "browser": browser }));
466                }
467                Err(switch_err) => {
468                    warn!(
469                        "retry: switch to {} failed, skipping: {}",
470                        browser, switch_err
471                    );
472                    if matches!(switch_err, SpiderError::BackendUnavailable(_)) {
473                        self.down_backends.insert(browser.to_string());
474                    }
475                    return TryResult {
476                        success: false,
477                        value: None,
478                        total_attempts,
479                        tried_action: false,
480                        last_error: Some(switch_err),
481                    };
482                }
483            }
484        }
485
486        let max_transient_retries: u32 = if allow_transient_retries { 2 } else { 0 };
487        let max_disconnect_retries: u32 = if allow_transient_retries { 2 } else { 0 };
488        let mut transient_retries: u32 = 0;
489        let mut disconnect_retries: u32 = 0;
490
491        while total_attempts < budget {
492            total_attempts += 1;
493
494            match make_future().await {
495                Ok(value) => {
496                    if let Some(domain) = Self::extract_domain(ctx.current_url.as_deref()) {
497                        self.selector
498                            .failure_tracker()
499                            .record_success(&domain, browser);
500                    }
501                    return TryResult {
502                        success: true,
503                        value: Some(value),
504                        total_attempts,
505                        tried_action: true,
506                        last_error: None,
507                    };
508                }
509                Err(err) => {
510                    let error_class = self.classify_error(&err);
511
512                    warn!(
513                        "retry: attempt {}/{} failed: {} (class={:?}, browser={}, stealth={})",
514                        total_attempts, budget, err, error_class, browser, stealth
515                    );
516
517                    self.opts.emitter.emit(
518                        "retry.attempt",
519                        json!({
520                            "attempt": total_attempts,
521                            "maxRetries": self.opts.max_retries,
522                            "error": err.to_string(),
523                        }),
524                    );
525
526                    // Auth -> bubble up immediately
527                    if error_class == ErrorClass::Auth {
528                        return TryResult {
529                            success: false,
530                            value: None,
531                            total_attempts,
532                            tried_action: true,
533                            last_error: Some(err),
534                        };
535                    }
536
537                    // Rate limit -> exponential backoff with jitter
538                    if error_class == ErrorClass::RateLimit {
539                        transient_retries += 1;
540                        if transient_retries >= 2 {
541                            if let Some(domain) = Self::extract_domain(ctx.current_url.as_deref())
542                            {
543                                self.selector
544                                    .failure_tracker()
545                                    .record_failure(&domain, browser);
546                            }
547                            return TryResult {
548                                success: false,
549                                value: None,
550                                total_attempts,
551                                tried_action: true,
552                                last_error: Some(err),
553                            };
554                        }
555                        let base_ms = match &err {
556                            SpiderError::RateLimit {
557                                retry_after_ms: Some(ms),
558                                ..
559                            } => *ms,
560                            _ => 2000 * transient_retries as u64,
561                        };
562                        let jitter = (base_ms / 4).min(1000); // deterministic jitter approximation
563                        tokio::time::sleep(tokio::time::Duration::from_millis(base_ms + jitter))
564                            .await;
565                        continue;
566                    }
567
568                    // Backend down -> mark and move on
569                    if error_class == ErrorClass::BackendDown {
570                        self.down_backends.insert(browser.to_string());
571                        return TryResult {
572                            success: false,
573                            value: None,
574                            total_attempts,
575                            tried_action: true,
576                            last_error: Some(err),
577                        };
578                    }
579
580                    // Blocked -> record failure, move to next browser
581                    if error_class == ErrorClass::Blocked {
582                        if let Some(domain) = Self::extract_domain(ctx.current_url.as_deref()) {
583                            self.selector
584                                .failure_tracker()
585                                .record_failure(&domain, browser);
586                        }
587                        return TryResult {
588                            success: false,
589                            value: None,
590                            total_attempts,
591                            tried_action: true,
592                            last_error: Some(err),
593                        };
594                    }
595
596                    // Timeout -> rotate immediately
597                    if matches!(&err, SpiderError::Timeout(_)) {
598                        if let Some(domain) = Self::extract_domain(ctx.current_url.as_deref()) {
599                            self.selector
600                                .failure_tracker()
601                                .record_failure(&domain, browser);
602                        }
603                        return TryResult {
604                            success: false,
605                            value: None,
606                            total_attempts,
607                            tried_action: true,
608                            last_error: Some(err),
609                        };
610                    }
611
612                    // WS disconnection -> reconnect with backoff
613                    if error_class == ErrorClass::Transient && self.is_disconnection_error(&err) {
614                        if disconnect_retries < max_disconnect_retries {
615                            disconnect_retries += 1;
616                            let backoff_ms = if disconnect_retries == 1 { 1000 } else { 3000 };
617                            tokio::time::sleep(tokio::time::Duration::from_millis(backoff_ms))
618                                .await;
619
620                            // Second disconnect -> try a different engine
621                            let reconnect_browser = if disconnect_retries >= 2 {
622                                self.pick_alternate_engine(browser)
623                                    .unwrap_or_else(|| browser.to_string())
624                            } else {
625                                browser.to_string()
626                            };
627
628                            if self.switch_browser(ctx, &reconnect_browser).await.is_err() {
629                                if let Some(domain) =
630                                    Self::extract_domain(ctx.current_url.as_deref())
631                                {
632                                    self.selector
633                                        .failure_tracker()
634                                        .record_failure(&domain, browser);
635                                }
636                                return TryResult {
637                                    success: false,
638                                    value: None,
639                                    total_attempts,
640                                    tried_action: true,
641                                    last_error: Some(err),
642                                };
643                            }
644                            continue; // retry the action
645                        }
646                        if let Some(domain) = Self::extract_domain(ctx.current_url.as_deref()) {
647                            self.selector
648                                .failure_tracker()
649                                .record_failure(&domain, browser);
650                        }
651                        return TryResult {
652                            success: false,
653                            value: None,
654                            total_attempts,
655                            tried_action: true,
656                            last_error: Some(err),
657                        };
658                    }
659
660                    // Non-disconnect transient -> retry same browser
661                    if error_class == ErrorClass::Transient
662                        && transient_retries < max_transient_retries
663                    {
664                        transient_retries += 1;
665                        tokio::time::sleep(tokio::time::Duration::from_millis(100)).await;
666                        continue;
667                    }
668
669                    // Transient retry exhausted -> move to next browser
670                    if let Some(domain) = Self::extract_domain(ctx.current_url.as_deref()) {
671                        self.selector
672                            .failure_tracker()
673                            .record_failure(&domain, browser);
674                    }
675                    return TryResult {
676                        success: false,
677                        value: None,
678                        total_attempts,
679                        tried_action: true,
680                        last_error: Some(err),
681                    };
682                }
683            }
684        }
685
686        TryResult {
687            success: false,
688            value: None,
689            total_attempts,
690            tried_action: true,
691            last_error,
692        }
693    }
694
695    // ------------------------------------------------------------------
696    // Error classification
697    // ------------------------------------------------------------------
698
699    /// Classify an error to determine retry strategy.
700    ///
701    /// Fast path: typed error variants (`match`, no string scan).
702    /// Slow path: Aho-Corasick O(n) single-pass keyword matching.
703    fn classify_error(&self, err: &SpiderError) -> ErrorClass {
704        match err {
705            SpiderError::Auth(_) => ErrorClass::Auth,
706            SpiderError::RateLimit { .. } => ErrorClass::RateLimit,
707            SpiderError::Blocked(_) => ErrorClass::Blocked,
708            SpiderError::BackendUnavailable(_) => ErrorClass::BackendDown,
709            SpiderError::Timeout(_) => ErrorClass::Transient,
710            SpiderError::Connection { ws_code, message, .. } => {
711                if let Some(code) = ws_code {
712                    match *code {
713                        1006 | 1011 => return ErrorClass::Transient,
714                        4001 | 4002 => return ErrorClass::Auth,
715                        _ => {}
716                    }
717                }
718                // Fall through to keyword scan on the message
719                self.error_classifier
720                    .classify(message)
721                    .copied()
722                    .unwrap_or(ErrorClass::Transient)
723            }
724            SpiderError::Navigation(msg) => {
725                let cls = self.error_classifier.classify(msg).copied();
726                if cls == Some(ErrorClass::Blocked) {
727                    ErrorClass::Blocked
728                } else {
729                    ErrorClass::Transient
730                }
731            }
732            _ => {
733                let msg = err.to_string();
734                let cls = self.error_classifier.classify(&msg).copied();
735                if let Some(c) = cls {
736                    return c;
737                }
738                // Transport-level 429 fallback
739                if msg.contains("429") {
740                    return ErrorClass::RateLimit;
741                }
742                ErrorClass::Transient
743            }
744        }
745    }
746
747    /// Check if an error indicates the WebSocket/session is dead.
748    fn is_disconnection_error(&self, err: &SpiderError) -> bool {
749        let msg = err.to_string();
750        match err {
751            // NavigationError: page-level errors (false) vs actual disconnections (true).
752            // `None` (no keyword match) -> treat as disconnection for NavigationError.
753            SpiderError::Navigation(_) => {
754                self.disconnection_classifier.classify(&msg) != Some(&false)
755            }
756            _ => self.disconnection_classifier.classify(&msg) == Some(&true),
757        }
758    }
759
760    // ------------------------------------------------------------------
761    // Browser switching
762    // ------------------------------------------------------------------
763
764    /// Reconnect with a (possibly different) browser, re-navigate to the same URL.
765    async fn switch_browser(
766        &self,
767        ctx: &mut RetryContext,
768        new_browser: &str,
769    ) -> Result<(), SpiderError> {
770        ctx.adapter.destroy();
771
772        ctx.transport.reconnect(new_browser).await?;
773
774        let adapter_opts = if self.command_timeout_ms != 30_000 {
775            Some(ProtocolAdapterOptions {
776                command_timeout_ms: Some(self.command_timeout_ms),
777            })
778        } else {
779            None
780        };
781
782        // We need a sender that relays to the transport. Create a relay channel.
783        let (proto_tx, mut proto_rx) = tokio::sync::mpsc::unbounded_channel::<String>();
784        let transport_clone = ctx.transport.clone();
785        tokio::spawn(async move {
786            while let Some(data) = proto_rx.recv().await {
787                let _ = transport_clone.send(data);
788            }
789        });
790
791        let mut new_adapter =
792            ProtocolAdapter::new(proto_tx, self.opts.emitter.clone(), new_browser, adapter_opts);
793        new_adapter.init().await?;
794
795        ctx.adapter = new_adapter;
796        (ctx.on_adapter_changed)(&ctx.adapter);
797
798        if let Some(ref url) = ctx.current_url {
799            ctx.adapter.navigate(url).await?;
800            tokio::time::sleep(tokio::time::Duration::from_millis(200)).await;
801        }
802
803        Ok(())
804    }
805
806    // ------------------------------------------------------------------
807    // Helpers
808    // ------------------------------------------------------------------
809
810    /// Stealth progression: from current level up to max.
811    ///
812    /// e.g. start=0, max=3 -> `[0, 1, 2, 3]`
813    fn get_stealth_progression(&self) -> Vec<u32> {
814        let start = self.current_stealth_level.load(Ordering::Relaxed);
815        let mut levels = vec![start];
816        let mut next = if start < 1 { 1 } else { start + 1 };
817        while next <= self.max_stealth_level {
818            levels.push(next);
819            next += 1;
820        }
821        levels
822    }
823
824    /// Order PRIMARY browsers starting from `start`, then the rest.
825    fn ordered_primary_browsers(start: &str) -> Vec<&'static str> {
826        let idx = PRIMARY_ROTATION.iter().position(|&b| b == start);
827        match idx {
828            Some(i) if i > 0 => {
829                let mut v = PRIMARY_ROTATION[i..].to_vec();
830                v.extend_from_slice(&PRIMARY_ROTATION[..i]);
831                v
832            }
833            _ => PRIMARY_ROTATION.to_vec(),
834        }
835    }
836
837    /// Pick a non-Chrome engine for disconnect recovery.
838    fn pick_alternate_engine(&self, current: &str) -> Option<String> {
839        for &browser in EXTENDED_ROTATION {
840            if browser != current && !self.down_backends.contains(browser) {
841                return Some(browser.to_string());
842            }
843        }
844        for &browser in PRIMARY_ROTATION {
845            if browser != current && !self.down_backends.contains(browser) {
846                return Some(browser.to_string());
847            }
848        }
849        None
850    }
851
852    /// Extract the hostname from a URL string.
853    fn extract_domain(url: Option<&str>) -> Option<String> {
854        url.and_then(|u| Url::parse(u).ok())
855            .and_then(|u| u.host_str().map(|h| h.to_string()))
856    }
857}
858
859#[cfg(test)]
860mod tests {
861    use super::*;
862
863    #[test]
864    fn error_classifier_blocked() {
865        let c = build_error_classifier();
866        assert_eq!(c.classify("Error 403 Forbidden"), Some(&ErrorClass::Blocked));
867        assert_eq!(c.classify("CAPTCHA detected"), Some(&ErrorClass::Blocked));
868        assert_eq!(c.classify("bot detection active"), Some(&ErrorClass::Blocked));
869        assert_eq!(c.classify("Access Denied"), Some(&ErrorClass::Blocked));
870    }
871
872    #[test]
873    fn error_classifier_auth() {
874        let c = build_error_classifier();
875        assert_eq!(c.classify("HTTP 401 Unauthorized"), Some(&ErrorClass::Auth));
876    }
877
878    #[test]
879    fn error_classifier_backend_down() {
880        let c = build_error_classifier();
881        assert_eq!(
882            c.classify("503 Service Unavailable"),
883            Some(&ErrorClass::BackendDown)
884        );
885        assert_eq!(
886            c.classify("backend unavailable for chrome"),
887            Some(&ErrorClass::BackendDown)
888        );
889    }
890
891    #[test]
892    fn error_classifier_transient() {
893        let c = build_error_classifier();
894        assert_eq!(
895            c.classify("net::ERR_CONNECTION_RESET"),
896            Some(&ErrorClass::Transient)
897        );
898        assert_eq!(
899            c.classify("WebSocket closed unexpectedly"),
900            Some(&ErrorClass::Transient)
901        );
902    }
903
904    #[test]
905    fn error_classifier_no_match() {
906        let c = build_error_classifier();
907        assert_eq!(c.classify("page loaded fine"), None);
908    }
909
910    #[test]
911    fn disconnection_classifier_true() {
912        let c = build_disconnection_classifier();
913        assert_eq!(c.classify("websocket is not connected"), Some(&true));
914        assert_eq!(c.classify("socket hang up"), Some(&true));
915        assert_eq!(c.classify("err_aborted during navigation"), Some(&true));
916    }
917
918    #[test]
919    fn disconnection_classifier_false_for_page_level() {
920        let c = build_disconnection_classifier();
921        assert_eq!(
922            c.classify("net::ERR_BLOCKED_BY_CLIENT"),
923            Some(&false)
924        );
925    }
926
927    #[test]
928    fn stealth_progression() {
929        // Simulating get_stealth_progression logic
930        let cases: Vec<(u32, u32, Vec<u32>)> = vec![
931            (0, 3, vec![0, 1, 2, 3]),
932            (2, 3, vec![2, 3]),
933            (3, 3, vec![3]),
934            (0, 1, vec![0, 1]),
935        ];
936        for (start, max, expected) in cases {
937            let mut levels = vec![start];
938            let mut next = if start < 1 { 1 } else { start + 1 };
939            while next <= max {
940                levels.push(next);
941                next += 1;
942            }
943            assert_eq!(levels, expected, "start={start}, max={max}");
944        }
945    }
946
947    #[test]
948    fn ordered_primary_browsers_from_start() {
949        assert_eq!(
950            RetryEngine::ordered_primary_browsers("chrome-h"),
951            vec!["chrome-h", "chrome-new"]
952        );
953        assert_eq!(
954            RetryEngine::ordered_primary_browsers("chrome-new"),
955            vec!["chrome-new", "chrome-h"]
956        );
957        // Unknown browser -> default order
958        assert_eq!(
959            RetryEngine::ordered_primary_browsers("firefox"),
960            vec!["chrome-h", "chrome-new"]
961        );
962    }
963
964    #[test]
965    fn extract_domain_works() {
966        assert_eq!(
967            RetryEngine::extract_domain(Some("https://example.com/path")),
968            Some("example.com".to_string())
969        );
970        assert_eq!(RetryEngine::extract_domain(None), None);
971        assert_eq!(RetryEngine::extract_domain(Some("not-a-url")), None);
972    }
973}