use std::sync::Arc;
use std::time::Duration;
use async_trait::async_trait;
use reqwest::Client;
use tracing::debug;
use crate::fetcher::PageFetcher;
use crate::proxy::ProxyPool;
use crate::Result;
const DEFAULT_USER_AGENT: &str =
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 \
(KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36";
pub struct HttpFetcher {
client: Client,
}
impl HttpFetcher {
pub fn new() -> Self {
Self {
client: Client::builder()
.user_agent(DEFAULT_USER_AGENT)
.build()
.expect("Failed to create HTTP client"),
}
}
pub fn with_proxy(proxy_url: &str) -> crate::Result<Self> {
let proxy = reqwest::Proxy::all(proxy_url)
.map_err(|e| crate::SearchError::Other(format!("Failed to create proxy: {}", e)))?;
let client = Client::builder()
.user_agent(DEFAULT_USER_AGENT)
.proxy(proxy)
.build()
.map_err(|e| {
crate::SearchError::Other(format!("Failed to create HTTP client: {}", e))
})?;
Ok(Self { client })
}
pub fn with_client(client: Client) -> Self {
Self { client }
}
pub fn client(&self) -> &Client {
&self.client
}
}
impl Default for HttpFetcher {
fn default() -> Self {
Self::new()
}
}
#[async_trait]
impl PageFetcher for HttpFetcher {
async fn fetch(&self, url: &str) -> Result<String> {
let response = self.client.get(url).send().await?;
let html = response.text().await?;
Ok(html)
}
}
pub struct PooledHttpFetcher {
pool: Arc<ProxyPool>,
timeout: Duration,
}
impl PooledHttpFetcher {
pub fn new(pool: Arc<ProxyPool>) -> Self {
Self {
pool,
timeout: Duration::from_secs(30),
}
}
pub fn with_timeout(mut self, timeout: Duration) -> Self {
self.timeout = timeout;
self
}
}
#[async_trait]
impl PageFetcher for PooledHttpFetcher {
async fn fetch(&self, url: &str) -> Result<String> {
let client = if let Some(proxy_config) = self.pool.get_proxy().await {
debug!(
"PooledHttpFetcher using proxy {}:{}",
proxy_config.host, proxy_config.port
);
let proxy = reqwest::Proxy::all(proxy_config.url())
.map_err(|e| crate::SearchError::Other(format!("Failed to create proxy: {}", e)))?;
Client::builder()
.user_agent(DEFAULT_USER_AGENT)
.timeout(self.timeout)
.proxy(proxy)
.build()
.map_err(|e| {
crate::SearchError::Other(format!("Failed to create HTTP client: {}", e))
})?
} else {
Client::builder()
.user_agent(DEFAULT_USER_AGENT)
.timeout(self.timeout)
.build()
.map_err(|e| {
crate::SearchError::Other(format!("Failed to create HTTP client: {}", e))
})?
};
let response = client.get(url).send().await?;
let html = response.text().await?;
Ok(html)
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::proxy::ProxyConfig;
#[test]
fn test_http_fetcher_new() {
let _fetcher = HttpFetcher::new();
}
#[test]
fn test_http_fetcher_default() {
let _fetcher = HttpFetcher::default();
}
#[test]
fn test_http_fetcher_with_client() {
let client = Client::builder().user_agent("test-agent").build().unwrap();
let _fetcher = HttpFetcher::with_client(client);
}
#[test]
fn test_http_fetcher_with_proxy_invalid() {
let result = HttpFetcher::with_proxy("");
assert!(result.is_err());
}
#[test]
fn test_http_fetcher_with_proxy_valid() {
let fetcher = HttpFetcher::with_proxy("http://127.0.0.1:8080");
assert!(fetcher.is_ok());
}
#[test]
fn test_http_fetcher_with_proxy_socks5() {
let fetcher = HttpFetcher::with_proxy("socks5://127.0.0.1:1080");
assert!(fetcher.is_ok());
}
#[test]
fn test_http_fetcher_client_accessor() {
let fetcher = HttpFetcher::new();
let _client = fetcher.client();
}
#[test]
fn test_pooled_http_fetcher_new() {
let pool = Arc::new(ProxyPool::new());
let _fetcher = PooledHttpFetcher::new(pool);
}
#[test]
fn test_pooled_http_fetcher_with_timeout() {
let pool = Arc::new(ProxyPool::new());
let fetcher = PooledHttpFetcher::new(pool).with_timeout(Duration::from_secs(15));
assert_eq!(fetcher.timeout, Duration::from_secs(15));
}
#[test]
fn test_pooled_http_fetcher_default_timeout() {
let pool = Arc::new(ProxyPool::new());
let fetcher = PooledHttpFetcher::new(pool);
assert_eq!(fetcher.timeout, Duration::from_secs(30));
}
#[test]
fn test_pooled_http_fetcher_with_proxies() {
let pool = Arc::new(ProxyPool::with_proxies(vec![
ProxyConfig::new("127.0.0.1", 8080),
ProxyConfig::new("127.0.0.1", 8081),
]));
let _fetcher = PooledHttpFetcher::new(pool);
}
}