use std::collections::BTreeMap;
use std::sync::Arc;
use std::time::Duration;
use async_trait::async_trait;
use crate::ban;
use crate::browser::BrowserBackend;
use crate::check::UncertainReason;
use crate::site::HttpMethod;
pub(crate) const BROWSER_TIMEOUT: Duration = Duration::from_secs(60);
pub(crate) struct FetchRequest<'a> {
pub method: HttpMethod,
pub url: &'a str,
pub body: Option<&'a str>,
pub user_agent: Option<&'a str>,
pub headers: &'a BTreeMap<String, String>,
pub want_body: bool,
}
pub(crate) struct FetchResponse {
pub status: u16,
pub final_url: String,
pub body: String,
}
pub(crate) struct FetchError(pub UncertainReason);
#[async_trait]
pub(crate) trait Fetcher: Send + Sync {
async fn fetch(&self, req: &FetchRequest<'_>) -> Result<FetchResponse, FetchError>;
}
pub(crate) struct HttpFetcher {
inner: reqwest::Client,
}
impl HttpFetcher {
pub(crate) fn new(inner: reqwest::Client) -> Self {
Self { inner }
}
pub(crate) fn client(&self) -> &reqwest::Client {
&self.inner
}
}
#[async_trait]
impl Fetcher for HttpFetcher {
async fn fetch(&self, req: &FetchRequest<'_>) -> Result<FetchResponse, FetchError> {
let sent = match req.method {
HttpMethod::Post => {
send(
&self.inner,
reqwest::Method::POST,
req.url,
req.user_agent,
req.headers,
req.body,
)
.await
}
HttpMethod::Get if req.want_body => {
send(
&self.inner,
reqwest::Method::GET,
req.url,
req.user_agent,
req.headers,
None,
)
.await
}
HttpMethod::Get => {
match send(
&self.inner,
reqwest::Method::HEAD,
req.url,
req.user_agent,
req.headers,
None,
)
.await
{
Ok(r) if r.status().as_u16() == 405 => {
send(
&self.inner,
reqwest::Method::GET,
req.url,
req.user_agent,
req.headers,
None,
)
.await
}
other => other,
}
}
};
let response = match sent {
Ok(r) => r,
Err(err) => {
tracing::debug!(url = %req.url, error = %err, "request failed");
return Err(FetchError(UncertainReason::Network(err.to_string())));
}
};
let status = response.status().as_u16();
let final_url = response.url().to_string();
if let Some(reason) = ban::detect_pre_body(status, response.headers()) {
tracing::warn!(url = %req.url, status, %reason, "ban-like response");
return Err(FetchError(reason));
}
let body = if req.want_body {
match response.text().await {
Ok(b) => b,
Err(err) => return Err(FetchError(UncertainReason::BodyRead(err.to_string()))),
}
} else {
String::new()
};
if !body.is_empty() {
if let Some(reason) = ban::detect_in_body(&body) {
tracing::warn!(url = %req.url, %reason, "ban-like body");
return Err(FetchError(reason));
}
}
Ok(FetchResponse {
status,
final_url,
body,
})
}
}
pub(crate) struct BrowserFetcher {
backend: Arc<dyn BrowserBackend>,
}
impl BrowserFetcher {
pub(crate) fn new(backend: Arc<dyn BrowserBackend>) -> Self {
Self { backend }
}
}
#[async_trait]
impl Fetcher for BrowserFetcher {
async fn fetch(&self, req: &FetchRequest<'_>) -> Result<FetchResponse, FetchError> {
let parsed = match url::Url::parse(req.url) {
Ok(u) => u,
Err(err) => {
return Err(FetchError(UncertainReason::Other(format!(
"invalid url: {err}"
))));
}
};
match self
.backend
.fetch(&parsed, req.headers, BROWSER_TIMEOUT)
.await
{
Ok(page) => Ok(FetchResponse {
status: page.status,
final_url: page.final_url.as_str().to_owned(),
body: page.body,
}),
Err(err) => {
tracing::warn!(url = %req.url, error = %err, "browser fetch failed");
Err(FetchError(UncertainReason::BrowserFailed(err.to_string())))
}
}
}
}
async fn send(
client: &reqwest::Client,
method: reqwest::Method,
url: &str,
ua: Option<&str>,
headers: &BTreeMap<String, String>,
body: Option<&str>,
) -> reqwest::Result<reqwest::Response> {
let mut request = client.request(method, url);
let has = |name: &str| headers.keys().any(|k| k.eq_ignore_ascii_case(name));
if let Some(ua) = ua {
if !has("user-agent") {
request = request.header(reqwest::header::USER_AGENT, ua);
}
}
for (k, v) in headers {
request = request.header(k, v);
}
if let Some(b) = body {
if !has("content-type") {
request = request.header(reqwest::header::CONTENT_TYPE, "application/json");
}
request = request.body(b.to_owned());
}
request.send().await
}
#[cfg(feature = "impersonate")]
pub(crate) use impersonate::ImpersonateFetcher;
#[cfg(feature = "impersonate")]
mod impersonate {
use super::{
FetchError, FetchRequest, FetchResponse, Fetcher, HttpMethod, UncertainReason, ban,
};
use async_trait::async_trait;
use std::collections::BTreeMap;
const EMULATION: wreq_util::Emulation = wreq_util::Emulation::Chrome134;
pub(crate) struct ImpersonateFetcher {
inner: wreq::Client,
}
impl ImpersonateFetcher {
pub(crate) fn new() -> crate::error::Result<Self> {
let inner = wreq::Client::builder()
.emulation(EMULATION)
.build()
.map_err(|e| crate::error::Error::HttpSetup {
message: format!("wreq client init: {e}"),
})?;
Ok(Self { inner })
}
}
#[async_trait]
impl Fetcher for ImpersonateFetcher {
async fn fetch(&self, req: &FetchRequest<'_>) -> Result<FetchResponse, FetchError> {
let sent = match req.method {
HttpMethod::Post => {
send(
&self.inner,
wreq::Method::POST,
req.url,
req.user_agent,
req.headers,
req.body,
)
.await
}
HttpMethod::Get if req.want_body => {
send(
&self.inner,
wreq::Method::GET,
req.url,
req.user_agent,
req.headers,
None,
)
.await
}
HttpMethod::Get => {
match send(
&self.inner,
wreq::Method::HEAD,
req.url,
req.user_agent,
req.headers,
None,
)
.await
{
Ok(r) if r.status().as_u16() == 405 => {
send(
&self.inner,
wreq::Method::GET,
req.url,
req.user_agent,
req.headers,
None,
)
.await
}
other => other,
}
}
};
let response = match sent {
Ok(r) => r,
Err(err) => {
tracing::debug!(url = %req.url, error = %err, "impersonate request failed");
return Err(FetchError(UncertainReason::Network(err.to_string())));
}
};
let status = response.status().as_u16();
let final_url = response.url().to_string();
if let Some(reason) = ban::detect_pre_body(status, response.headers()) {
tracing::warn!(url = %req.url, status, %reason, "ban-like response");
return Err(FetchError(reason));
}
let body = if req.want_body {
match response.text().await {
Ok(b) => b,
Err(err) => {
return Err(FetchError(UncertainReason::BodyRead(err.to_string())));
}
}
} else {
String::new()
};
if !body.is_empty() {
if let Some(reason) = ban::detect_in_body(&body) {
tracing::warn!(url = %req.url, %reason, "ban-like body");
return Err(FetchError(reason));
}
}
Ok(FetchResponse {
status,
final_url,
body,
})
}
}
async fn send(
client: &wreq::Client,
method: wreq::Method,
url: &str,
ua: Option<&str>,
headers: &BTreeMap<String, String>,
body: Option<&str>,
) -> wreq::Result<wreq::Response> {
let mut request = client.request(method, url);
let has = |name: &str| headers.keys().any(|k| k.eq_ignore_ascii_case(name));
if let Some(ua) = ua {
if !has("user-agent") {
request = request.header(wreq::header::USER_AGENT, ua);
}
}
for (k, v) in headers {
request = request.header(k, v);
}
if let Some(b) = body {
if !has("content-type") {
request = request.header(wreq::header::CONTENT_TYPE, "application/json");
}
request = request.body(b.to_owned());
}
request.send().await
}
}