use std::collections::HashMap;
use std::time::Duration;
use serde_json::Value;
use tracing::{debug, error, warn};
use wreq_util::Emulation;
use crate::config::{
FetcherConfig, FetcherConfigBuilder, FollowRedirects, Impersonate, ParserConfig,
};
use crate::error::{FetchError, Result};
use crate::fingerprint::{default_user_agent, generate_headers};
use crate::proxy::{Proxy, ProxyRotator};
use crate::response::{Response, build_response_async};
fn merge_headers(
base: &HashMap<String, String>,
req: &RequestConfig,
stealth: bool,
impersonate_enabled: bool,
) -> HashMap<String, String> {
let mut headers = base.clone();
if let Some(req_headers) = &req.headers {
headers.extend(req_headers.iter().map(|(k, v)| (k.clone(), v.clone())));
}
let keys_lower: std::collections::HashSet<String> =
headers.keys().map(|k| k.to_lowercase()).collect();
match (stealth, impersonate_enabled) {
(true, _) => {
if !keys_lower.contains("referer") {
headers.insert("referer".into(), "https://www.google.com/".into());
}
if !impersonate_enabled {
generate_headers(false)
.into_iter()
.filter(|(k, _)| !keys_lower.contains(&k.to_lowercase()))
.for_each(|(k, v)| {
headers.insert(k, v);
});
}
}
(false, false) if !keys_lower.contains("user-agent") => {
headers.insert("User-Agent".into(), default_user_agent());
}
_ => {}
}
headers
}
fn resolve_emulation(name: &str) -> Option<Emulation> {
match name.to_lowercase().as_str() {
"chrome" | "chrome145" => Some(Emulation::Chrome145),
"chrome100" => Some(Emulation::Chrome100),
"chrome120" => Some(Emulation::Chrome120),
"chrome124" => Some(Emulation::Chrome124),
"chrome131" => Some(Emulation::Chrome131),
"chrome136" => Some(Emulation::Chrome136),
"chrome140" => Some(Emulation::Chrome140),
"chrome142" => Some(Emulation::Chrome142),
"chrome143" => Some(Emulation::Chrome143),
"chrome144" => Some(Emulation::Chrome144),
"edge" | "edge145" => Some(Emulation::Edge145),
"edge140" => Some(Emulation::Edge140),
"edge134" => Some(Emulation::Edge134),
"safari" | "safari26" => Some(Emulation::Safari26),
"safari18" => Some(Emulation::Safari18_5),
"firefox" | "firefox135" => Some(Emulation::Firefox135),
"firefox133" => Some(Emulation::Firefox133),
"firefox128" => Some(Emulation::Firefox128),
_ => None,
}
}
#[derive(Debug, Default)]
pub struct RequestConfig {
pub headers: Option<HashMap<String, String>>,
pub cookies: Option<HashMap<String, String>>,
pub params: Option<HashMap<String, String>>,
pub timeout_secs: Option<u64>,
pub follow_redirects: Option<FollowRedirects>,
pub max_redirects: Option<usize>,
pub retries: Option<u32>,
pub retry_delay_secs: Option<u64>,
pub proxy: Option<Proxy>,
pub verify: Option<bool>,
pub impersonate: Option<Impersonate>,
pub stealthy_headers: Option<bool>,
pub data: Option<Vec<u8>>,
pub json: Option<Value>,
pub auth: Option<(String, String)>,
}
pub struct Fetcher {
config: FetcherConfig,
proxy_rotator: Option<ProxyRotator>,
parser_config: ParserConfig,
}
impl Fetcher {
pub fn new() -> Self {
Self {
config: FetcherConfig::default(),
proxy_rotator: None,
parser_config: ParserConfig::default(),
}
}
pub fn with_config(config: FetcherConfig) -> Self {
Self {
config,
proxy_rotator: None,
parser_config: ParserConfig::default(),
}
}
pub fn builder() -> FetcherConfigBuilder {
FetcherConfigBuilder::new()
}
pub fn from_builder(builder: FetcherConfigBuilder) -> Result<Self> {
let (config, rotator) = builder.build()?;
Ok(Self {
config,
proxy_rotator: rotator,
parser_config: ParserConfig::default(),
})
}
pub fn set_proxy_rotator(&mut self, rotator: ProxyRotator) {
self.proxy_rotator = Some(rotator);
}
pub fn set_parser_config(&mut self, parser_config: ParserConfig) {
self.parser_config = parser_config;
}
pub fn config(&self) -> &FetcherConfig {
&self.config
}
pub async fn get(&self, url: &str, req: Option<RequestConfig>) -> Result<Response> {
self.request("GET", url, req.unwrap_or_default()).await
}
pub async fn post(&self, url: &str, req: Option<RequestConfig>) -> Result<Response> {
self.request("POST", url, req.unwrap_or_default()).await
}
pub async fn put(&self, url: &str, req: Option<RequestConfig>) -> Result<Response> {
self.request("PUT", url, req.unwrap_or_default()).await
}
pub async fn delete(&self, url: &str, req: Option<RequestConfig>) -> Result<Response> {
self.request("DELETE", url, req.unwrap_or_default()).await
}
async fn request(&self, method: &str, url: &str, req: RequestConfig) -> Result<Response> {
let max_retries = req.retries.unwrap_or(self.config.retries);
let retry_delay = req.retry_delay_secs.unwrap_or(self.config.retry_delay_secs);
let static_proxy = req.proxy.clone();
let mut last_error: Option<FetchError> = None;
for attempt in 0..max_retries {
let proxy = match (&self.proxy_rotator, &static_proxy) {
(Some(rotator), None) => Some(rotator.get_proxy()),
_ => static_proxy.clone().or_else(|| self.config.proxy.clone()),
};
match self
.execute_request(method, url, &req, proxy.as_ref())
.await
{
Ok(response) => return Ok(response),
Err(e) => {
match attempt < max_retries - 1 {
true => {
warn!(attempt = attempt + 1, error = %e, "request failed, retrying in {retry_delay}s");
tokio::time::sleep(Duration::from_secs(retry_delay)).await;
}
false => {
error!(attempts = max_retries, error = %e, "all retries exhausted");
}
}
last_error = Some(e);
}
}
}
Err(FetchError::MaxRetriesExceeded {
attempts: max_retries,
last_error: Box::new(last_error.unwrap_or(FetchError::Other("unknown error".into()))),
})
}
async fn execute_request(
&self,
method: &str,
url: &str,
req: &RequestConfig,
proxy: Option<&Proxy>,
) -> Result<Response> {
let stealth = req.stealthy_headers.unwrap_or(self.config.stealthy_headers);
let impersonate = req.impersonate.as_ref().unwrap_or(&self.config.impersonate);
let impersonate_selected = impersonate.select();
let timeout = req.timeout_secs.unwrap_or(self.config.timeout_secs);
let follow = req.follow_redirects.unwrap_or(self.config.follow_redirects);
let max_redirects = req.max_redirects.unwrap_or(self.config.max_redirects);
let verify = req.verify.unwrap_or(self.config.verify);
let final_headers = merge_headers(
&self.config.headers,
req,
stealth,
impersonate_selected.is_some(),
);
let mut client_builder = wreq::Client::builder().timeout(Duration::from_secs(timeout));
if !verify {
client_builder = client_builder.cert_verification(false);
}
match follow {
FollowRedirects::None => {
client_builder = client_builder.redirect(wreq::redirect::Policy::none());
}
FollowRedirects::All | FollowRedirects::Safe => {
client_builder =
client_builder.redirect(wreq::redirect::Policy::limited(max_redirects));
}
}
if let Some(p) = proxy {
let rp = wreq::Proxy::all(p.server())
.map_err(|e| FetchError::InvalidProxy(e.to_string()))?;
client_builder = client_builder.proxy(rp);
}
let client = client_builder.build()?;
let http_method: wreq::Method = method
.parse()
.map_err(|_| FetchError::Other(format!("invalid HTTP method: {method}")))?;
let mut full_url = url::Url::parse(url)?;
if let Some(params) = &req.params {
let mut pairs = full_url.query_pairs_mut();
params.iter().for_each(|(k, v)| {
pairs.append_pair(k, v);
});
}
let mut request_builder = client.request(http_method, full_url.as_str());
if let Some(browser_name) = impersonate_selected {
if let Some(emulation) = resolve_emulation(browser_name) {
request_builder = request_builder.emulation(emulation);
}
}
for (k, v) in &final_headers {
request_builder = request_builder.header(k.as_str(), v.as_str());
}
if let Some(cookies) = &req.cookies {
let cookie_str = cookies
.iter()
.map(|(k, v)| format!("{k}={v}"))
.collect::<Vec<_>>()
.join("; ");
request_builder = request_builder.header("cookie", cookie_str);
}
if let Some((user, pass)) = &req.auth {
request_builder = request_builder.basic_auth(user, Some(pass));
}
if let Some(json_body) = &req.json {
request_builder = request_builder
.header("content-type", "application/json")
.body(serde_json::to_vec(json_body)?);
} else if let Some(data) = &req.data {
request_builder = request_builder.body(data.clone());
}
let request_headers_map = final_headers;
debug!(method, url, "sending request via wreq");
let resp = request_builder.send().await?;
let mut meta = HashMap::new();
if let Some(p) = proxy {
meta.insert("proxy".to_owned(), Value::String(p.server().to_owned()));
}
build_response_async(resp, request_headers_map, method, meta).await
}
}
impl Default for Fetcher {
fn default() -> Self {
Self::new()
}
}
pub struct FetcherSession {
config: FetcherConfig,
proxy_rotator: Option<ProxyRotator>,
parser_config: ParserConfig,
client: Option<wreq::Client>,
}
impl FetcherSession {
pub fn new(config: FetcherConfig) -> Self {
Self {
config,
proxy_rotator: None,
parser_config: ParserConfig::default(),
client: None,
}
}
pub fn with_rotator(mut self, rotator: ProxyRotator) -> Self {
self.proxy_rotator = Some(rotator);
self
}
pub fn with_parser_config(mut self, parser_config: ParserConfig) -> Self {
self.parser_config = parser_config;
self
}
pub fn open(&mut self) -> Result<()> {
if self.client.is_some() {
return Err(FetchError::SessionAlreadyActive);
}
let mut builder = wreq::Client::builder()
.timeout(Duration::from_secs(self.config.timeout_secs))
.cookie_store(true);
if !self.config.verify {
builder = builder.cert_verification(false);
}
match self.config.follow_redirects {
FollowRedirects::None => {
builder = builder.redirect(wreq::redirect::Policy::none());
}
FollowRedirects::All | FollowRedirects::Safe => {
builder =
builder.redirect(wreq::redirect::Policy::limited(self.config.max_redirects));
}
}
if let Some(ref p) = self.config.proxy {
let rp = wreq::Proxy::all(p.server())
.map_err(|e| FetchError::InvalidProxy(e.to_string()))?;
builder = builder.proxy(rp);
}
self.client = Some(builder.build()?);
Ok(())
}
pub fn close(&mut self) {
self.client = None;
}
pub fn is_active(&self) -> bool {
self.client.is_some()
}
pub async fn get(&self, url: &str, req: Option<RequestConfig>) -> Result<Response> {
self.request("GET", url, req.unwrap_or_default()).await
}
pub async fn post(&self, url: &str, req: Option<RequestConfig>) -> Result<Response> {
self.request("POST", url, req.unwrap_or_default()).await
}
pub async fn put(&self, url: &str, req: Option<RequestConfig>) -> Result<Response> {
self.request("PUT", url, req.unwrap_or_default()).await
}
pub async fn delete(&self, url: &str, req: Option<RequestConfig>) -> Result<Response> {
self.request("DELETE", url, req.unwrap_or_default()).await
}
async fn request(&self, method: &str, url: &str, req: RequestConfig) -> Result<Response> {
let client = self.client.as_ref().ok_or(FetchError::SessionNotActive)?;
let stealth = req.stealthy_headers.unwrap_or(self.config.stealthy_headers);
let impersonate = req.impersonate.as_ref().unwrap_or(&self.config.impersonate);
let impersonate_selected = impersonate.select();
let final_headers = merge_headers(
&self.config.headers,
&req,
stealth,
impersonate_selected.is_some(),
);
let http_method: wreq::Method = method
.parse()
.map_err(|_| FetchError::Other(format!("invalid HTTP method: {method}")))?;
let mut full_url = url::Url::parse(url)?;
if let Some(params) = &req.params {
let mut pairs = full_url.query_pairs_mut();
params.iter().for_each(|(k, v)| {
pairs.append_pair(k, v);
});
}
let mut request_builder = client.request(http_method, full_url.as_str());
if let Some(browser_name) = impersonate_selected {
if let Some(emulation) = resolve_emulation(browser_name) {
request_builder = request_builder.emulation(emulation);
}
}
for (k, v) in &final_headers {
request_builder = request_builder.header(k.as_str(), v.as_str());
}
if let Some(cookies) = &req.cookies {
let cookie_str = cookies
.iter()
.map(|(k, v)| format!("{k}={v}"))
.collect::<Vec<_>>()
.join("; ");
request_builder = request_builder.header("cookie", cookie_str);
}
if let Some((user, pass)) = &req.auth {
request_builder = request_builder.basic_auth(user, Some(pass));
}
if let Some(json_body) = &req.json {
request_builder = request_builder
.header("content-type", "application/json")
.body(serde_json::to_vec(json_body)?);
} else if let Some(data) = &req.data {
request_builder = request_builder.body(data.clone());
}
debug!(method, url, "sending request via wreq session");
let resp = request_builder.send().await?;
build_response_async(resp, final_headers, method, HashMap::new()).await
}
}
impl Drop for FetcherSession {
fn drop(&mut self) {
self.close();
}
}