halldyll-core 0.1.0

Core scraping engine for Halldyll - high-performance async web scraper for AI agents
Documentation
//! Request - HTTP request building

use reqwest::header::{HeaderMap, HeaderName, HeaderValue, ACCEPT, ACCEPT_ENCODING, ACCEPT_LANGUAGE, IF_MODIFIED_SINCE, IF_NONE_MATCH};
use url::Url;

use crate::types::Config;

/// HTTP request builder
pub struct RequestBuilder {
    url: Url,
    headers: HeaderMap,
    etag: Option<String>,
    last_modified: Option<String>,
}

impl RequestBuilder {
    /// New request for a URL
    pub fn new(url: Url) -> Self {
        Self {
            url,
            headers: HeaderMap::new(),
            etag: None,
            last_modified: None,
        }
    }

    /// Apply default configuration
    pub fn with_config(mut self, config: &Config) -> Self {
        // Accept headers
        if let Ok(v) = HeaderValue::from_str(&config.fetch.accept_headers.accept) {
            self.headers.insert(ACCEPT, v);
        }
        if let Ok(v) = HeaderValue::from_str(&config.fetch.accept_headers.accept_language) {
            self.headers.insert(ACCEPT_LANGUAGE, v);
        }
        if let Ok(v) = HeaderValue::from_str(&config.fetch.accept_headers.accept_encoding) {
            self.headers.insert(ACCEPT_ENCODING, v);
        }
        self
    }

    /// Add a custom header
    pub fn header(mut self, name: &str, value: &str) -> Self {
        if let (Ok(n), Ok(v)) = (
            HeaderName::try_from(name),
            HeaderValue::from_str(value),
        ) {
            self.headers.insert(n, v);
        }
        self
    }

    /// Configure conditional request with ETag
    pub fn if_none_match(mut self, etag: &str) -> Self {
        self.etag = Some(etag.to_string());
        if let Ok(v) = HeaderValue::from_str(etag) {
            self.headers.insert(IF_NONE_MATCH, v);
        }
        self
    }

    /// Configure conditional request with Last-Modified
    pub fn if_modified_since(mut self, date: &str) -> Self {
        self.last_modified = Some(date.to_string());
        if let Ok(v) = HeaderValue::from_str(date) {
            self.headers.insert(IF_MODIFIED_SINCE, v);
        }
        self
    }

    /// Returns the URL
    pub fn url(&self) -> &Url {
        &self.url
    }

    /// Returns the headers
    pub fn headers(&self) -> &HeaderMap {
        &self.headers
    }

    /// Consume and return components
    pub fn build(self) -> (Url, HeaderMap) {
        (self.url, self.headers)
    }
}