spider_lib/downloader.rs
1//! Traits for defining and implementing HTTP downloaders in `spider-lib`.
2//!
3//! This module provides the foundational traits for handling HTTP requests and
4//! responses within the web crawling framework.
5//!
6//! - The `Downloader` trait defines the interface for components responsible
7//! for executing web requests and producing `Response` objects. Implementations
8//! of this trait typically wrap HTTP client libraries (e.g., `reqwest`).
9//! - The `SimpleHttpClient` trait offers a basic, generic interface for
10//! performing simple GET requests, primarily used for internal utility
11//! functions or when a full `Request` object is not necessary.
12
13use async_trait::async_trait;
14use bytes::Bytes;
15use http::StatusCode;
16use std::time::Duration;
17
18use crate::error::SpiderError;
19use crate::request::Request;
20use crate::response::Response;
21
22/// A simple HTTP client trait for fetching web content.
23#[async_trait]
24pub trait SimpleHttpClient: Send + Sync {
25 /// Fetches the content of a URL as text.
26 async fn get_text(
27 &self,
28 url: &str,
29 timeout: Duration,
30 ) -> Result<(StatusCode, Bytes), SpiderError>;
31}
32
33/// A trait for HTTP downloaders that can fetch web pages and apply middleware
34#[async_trait]
35pub trait Downloader: Send + Sync + 'static {
36 type Client: Send + Sync;
37 /// Download a web page using the provided request.
38 /// This function focuses solely on executing the HTTP request.
39 async fn download(&self, request: Request) -> Result<Response, SpiderError>;
40 /// Returns a reference to the underlying HTTP client.
41 fn client(&self) -> &Self::Client;
42}