Skip to main content

spider_util/
http_client.rs

1//! HTTP Client trait for fetching web content.
2//!
3//! This module provides the `HttpClient` trait, which is a simple abstraction
4//! for HTTP clients used throughout the spider framework.
5
6use async_trait::async_trait;
7use bytes::Bytes;
8use http::StatusCode;
9use std::time::Duration;
10
11use crate::error::SpiderError;
12
13/// A simple HTTP client trait for fetching web content.
14#[async_trait]
15pub trait HttpClient: Send + Sync {
16    /// Fetches the content of a URL as text.
17    ///
18    /// # Errors
19    ///
20    /// Returns an error when the request fails, times out, or the response body
21    /// cannot be read.
22    async fn get_text(
23        &self,
24        url: &str,
25        timeout: Duration,
26    ) -> Result<(StatusCode, Bytes), SpiderError>;
27}
28
29#[async_trait]
30impl HttpClient for reqwest::Client {
31    async fn get_text(
32        &self,
33        url: &str,
34        timeout: Duration,
35    ) -> Result<(StatusCode, Bytes), SpiderError> {
36        let resp = self.get(url).timeout(timeout).send().await?;
37        let status = resp.status();
38        let body = resp.bytes().await?;
39        Ok((status, body))
40    }
41}