use std::path::PathBuf;
use std::sync::Arc;
use std::time::Duration;
use futures::StreamExt;
use reqwest::Client as ReqwestClient;
use url::Url;
#[derive(Debug, Clone)]
pub enum HttpClient {
Reqwest(ReqwestClient),
Fantoccini(fantoccini::Client),
Chromium(Arc<chromiumoxide::Browser>, Duration),
}
impl Default for HttpClient {
fn default() -> Self {
Self::Reqwest(
ReqwestClient::builder()
.user_agent(format!("mq crawler/0.1 ({})", env!("CARGO_PKG_HOMEPAGE")))
.build()
.expect("Failed to build default reqwest client"),
)
}
}
impl HttpClient {
pub fn new_reqwest(timeout: f64) -> Result<Self, String> {
let client = ReqwestClient::builder()
.user_agent(format!("mq crawler/0.1 ({})", env!("CARGO_PKG_HOMEPAGE")))
.pool_max_idle_per_host(3)
.pool_idle_timeout(Duration::from_secs(90))
.timeout(Duration::from_secs(timeout as u64))
.connect_timeout(Duration::from_secs(10))
.tcp_keepalive(Duration::from_secs(120))
.build()
.map_err(|e| format!("Failed to build reqwest client: {}", e))?;
Ok(Self::Reqwest(client))
}
pub fn new_reqwest_multi_domain(timeout: f64, max_idle_per_host: usize) -> Result<Self, String> {
let client = ReqwestClient::builder()
.user_agent(format!("mq crawler/0.1 ({})", env!("CARGO_PKG_HOMEPAGE")))
.pool_max_idle_per_host(max_idle_per_host)
.pool_idle_timeout(Duration::from_secs(90))
.timeout(Duration::from_secs(timeout as u64))
.connect_timeout(Duration::from_secs(10))
.tcp_keepalive(Duration::from_secs(120))
.build()
.map_err(|e| format!("Failed to build reqwest client: {}", e))?;
Ok(Self::Reqwest(client))
}
pub async fn new_chromium(chrome_path: Option<PathBuf>, headless_wait: Duration) -> Result<Self, String> {
let mut config_builder = chromiumoxide::browser::BrowserConfig::builder().arg("--disable-gpu");
if let Some(path) = chrome_path {
config_builder = config_builder.chrome_executable(path);
}
let config = config_builder
.build()
.map_err(|e| format!("Failed to build Chrome config: {}", e))?;
let (browser, mut handler) = chromiumoxide::Browser::launch(config)
.await
.map_err(|e| format!("Failed to launch Chrome: {}", e))?;
tokio::spawn(async move {
while let Some(h) = handler.next().await {
if let Err(e) = h {
tracing::debug!("Browser handler event error: {}", e);
}
}
});
Ok(Self::Chromium(Arc::new(browser), headless_wait))
}
pub async fn fetch(&self, url: Url) -> Result<String, String> {
match self {
HttpClient::Reqwest(client) => {
let response = client
.get(url.clone())
.send()
.await
.map_err(|e| format!("Failed to fetch URL {}: {}", url, e))?;
if response.status().is_success() {
response
.text()
.await
.map_err(|e| format!("Failed to read response text: {}", e))
} else {
Err(format!("Request to {} failed with status: {}", url, response.status()))
}
}
HttpClient::Fantoccini(client) => {
let url_str = url.as_str();
client
.goto(url_str)
.await
.map_err(|e| format!("Fantoccini failed to navigate to {}: {}", url, e))?;
let page_source = client
.source()
.await
.map_err(|e| format!("Fantoccini failed to get page source: {}", e))?;
Ok(page_source)
}
HttpClient::Chromium(browser, wait_duration) => {
let page = browser
.new_page(url.as_str())
.await
.map_err(|e| format!("Chrome failed to open page {}: {}", url, e))?;
if !wait_duration.is_zero() {
tokio::time::sleep(*wait_duration).await;
}
let content = page
.content()
.await
.map_err(|e| format!("Chrome failed to get content from {}: {}", url, e))?;
page.close()
.await
.map_err(|e| format!("Chrome failed to close page: {}", e))?;
Ok(content)
}
}
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_default_client_creation() {
let client = HttpClient::default();
assert!(matches!(client, HttpClient::Reqwest(_)));
}
#[test]
fn test_new_reqwest_client() {
let client = HttpClient::new_reqwest(30.0).unwrap();
assert!(matches!(client, HttpClient::Reqwest(_)));
}
}