halldyll-core 0.1.0

Core scraping engine for Halldyll - high-performance async web scraper for AI agents
Documentation
//! HTTP Client - Configuration and connection pooling

use reqwest::{Client, ClientBuilder};
use std::sync::Arc;
use std::time::Duration;

use crate::types::{Config, Error};
use crate::types::error::Result;

/// Thread-safe shareable HTTP client
#[derive(Clone)]
pub struct HttpClient {
    inner: Arc<Client>,
    config: Arc<Config>,
}

impl HttpClient {
    /// Creates a new HTTP client with the given configuration
    pub fn new(config: Config) -> Result<Self> {
        let client = Self::build_client(&config)?;
        Ok(Self {
            inner: Arc::new(client),
            config: Arc::new(config),
        })
    }

    /// Builds the reqwest client
    fn build_client(config: &Config) -> Result<Client> {
        let mut builder = ClientBuilder::new()
            .user_agent(&config.fetch.user_agent)
            .timeout(Duration::from_millis(config.fetch.total_timeout_ms))
            .connect_timeout(Duration::from_millis(config.fetch.connect_timeout_ms))
            .pool_max_idle_per_host(10)
            .pool_idle_timeout(Duration::from_secs(90))
            .tcp_nodelay(true)
            .tcp_keepalive(Duration::from_secs(60));

        // Redirects
        if config.fetch.follow_redirects {
            builder = builder.redirect(reqwest::redirect::Policy::limited(
                config.fetch.max_redirects as usize,
            ));
        } else {
            builder = builder.redirect(reqwest::redirect::Policy::none());
        }

        // Decompression
        if config.fetch.enable_decompression {
            builder = builder
                .gzip(true)
                .brotli(true)
                .deflate(true);
        } else {
            builder = builder
                .gzip(false)
                .brotli(false)
                .deflate(false);
        }

        builder.build().map_err(Error::Network)
    }

    /// Access to the inner client
    pub fn client(&self) -> &Client {
        &self.inner
    }

    /// Access to the configuration
    pub fn config(&self) -> &Config {
        &self.config
    }

    /// Configured User-Agent
    pub fn user_agent(&self) -> &str {
        &self.config.fetch.user_agent
    }
}