Skip to main content

spider_lib/
downloader.rs

1//! Traits for defining and implementing HTTP downloaders in `spider-lib`.
2//!
3//! This module provides the foundational traits for handling HTTP requests and
4//! responses within the web crawling framework.
5//!
6//! - The `Downloader` trait defines the interface for components responsible
7//!   for executing web requests and producing `Response` objects. Implementations
8//!   of this trait typically wrap HTTP client libraries (e.g., `reqwest`).
9//! - The `SimpleHttpClient` trait offers a basic, generic interface for
10//!   performing simple GET requests, primarily used for internal utility
11//!   functions or when a full `Request` object is not necessary.
12
13use async_trait::async_trait;
14use bytes::Bytes;
15use http::StatusCode;
16use std::time::Duration;
17
18use crate::error::SpiderError;
19use crate::request::Request;
20use crate::response::Response;
21
22/// A simple HTTP client trait for fetching web content.
23#[async_trait]
24pub trait SimpleHttpClient: Send + Sync {
25    /// Fetches the content of a URL as text.
26    async fn get_text(
27        &self,
28        url: &str,
29        timeout: Duration,
30    ) -> Result<(StatusCode, Bytes), SpiderError>;
31}
32
33/// A trait for HTTP downloaders that can fetch web pages and apply middleware
34#[async_trait]
35pub trait Downloader: Send + Sync + 'static {
36    type Client: Send + Sync;
37    /// Download a web page using the provided request.
38    /// This function focuses solely on executing the HTTP request.
39    async fn download(&self, request: Request) -> Result<Response, SpiderError>;
40    /// Returns a reference to the underlying HTTP client.
41    fn client(&self) -> &Self::Client;
42}