halldyll-core 0.1.0

Core scraping engine for Halldyll - high-performance async web scraper for AI agents
Documentation
//! Retry - Retry strategy with exponential backoff and jitter

use rand::Rng;
use std::time::Duration;

use crate::types::error::Error;

/// Retry policy
#[derive(Debug, Clone)]
pub struct RetryPolicy {
    /// Maximum number of attempts
    pub max_retries: u32,
    /// Initial delay (ms)
    pub initial_delay_ms: u64,
    /// Backoff multiplier
    pub multiplier: f64,
    /// Max jitter (ms)
    pub max_jitter_ms: u64,
    /// Max delay (ms)
    pub max_delay_ms: u64,
    /// Retry on timeout errors
    pub retry_on_timeout: bool,
    /// Retry on connection errors
    pub retry_on_connection_error: bool,
    /// Retry on 5xx server errors
    pub retry_on_5xx: bool,
    /// Retry on 429 rate limit errors
    pub retry_on_429: bool,
}

impl Default for RetryPolicy {
    fn default() -> Self {
        Self {
            max_retries: 3,
            initial_delay_ms: 1000,
            multiplier: 2.0,
            max_jitter_ms: 500,
            max_delay_ms: 30000,
            retry_on_timeout: true,
            retry_on_connection_error: true,
            retry_on_5xx: true,
            retry_on_429: true,
        }
    }
}

impl RetryPolicy {
    /// Calculates the delay for a given attempt (with jitter)
    pub fn delay_for_attempt(&self, attempt: u32) -> Duration {
        let base_delay = self.initial_delay_ms as f64 * self.multiplier.powi(attempt as i32);
        let capped_delay = base_delay.min(self.max_delay_ms as f64);

        // Add jitter
        let jitter = if self.max_jitter_ms > 0 {
            let mut rng = rand::thread_rng();
            rng.gen_range(0..=self.max_jitter_ms)
        } else {
            0
        };

        Duration::from_millis(capped_delay as u64 + jitter)
    }

    /// Should we retry this error?
    pub fn should_retry(&self, error: &Error, attempt: u32) -> bool {
        if attempt >= self.max_retries {
            return false;
        }

        match error {
            Error::Timeout(_) => self.retry_on_timeout,
            Error::Network(e) => {
                if e.is_timeout() {
                    self.retry_on_timeout
                } else if e.is_connect() {
                    self.retry_on_connection_error
                } else if let Some(status) = e.status() {
                    if status.as_u16() == 429 {
                        self.retry_on_429
                    } else if status.is_server_error() {
                        self.retry_on_5xx
                    } else {
                        false
                    }
                } else {
                    self.retry_on_connection_error
                }
            }
            Error::RateLimited(_) => self.retry_on_429,
            _ => false,
        }
    }

    /// Should we retry this HTTP status code?
    pub fn should_retry_status(&self, status_code: u16, attempt: u32) -> bool {
        if attempt >= self.max_retries {
            return false;
        }

        match status_code {
            429 => self.retry_on_429,
            500..=599 => self.retry_on_5xx,
            _ => false,
        }
    }
}

/// Retry state
#[derive(Debug)]
pub struct RetryState {
    policy: RetryPolicy,
    attempts: u32,
}

impl RetryState {
    /// New state
    pub fn new(policy: RetryPolicy) -> Self {
        Self {
            policy,
            attempts: 0,
        }
    }

    /// Increments the attempts counter
    pub fn increment(&mut self) {
        self.attempts += 1;
    }

    /// Number of attempts
    pub fn attempts(&self) -> u32 {
        self.attempts
    }

    /// Delay before next attempt
    pub fn next_delay(&self) -> Duration {
        self.policy.delay_for_attempt(self.attempts)
    }

    /// Should we retry?
    pub fn should_retry(&self, error: &Error) -> bool {
        self.policy.should_retry(error, self.attempts)
    }

    /// Returns the policy
    pub fn policy(&self) -> &RetryPolicy {
        &self.policy
    }
}