use std::collections::BTreeMap;
use std::sync::Arc;
use std::time::Duration;
use async_trait::async_trait;
use crate::ban;
use crate::browser::BrowserBackend;
use crate::check::UncertainReason;
use crate::site::HttpMethod;
pub(crate) const BROWSER_TIMEOUT: Duration = Duration::from_secs(60);
pub(crate) struct FetchRequest<'a> {
pub method: HttpMethod,
pub url: &'a str,
pub body: Option<&'a str>,
pub user_agent: Option<&'a str>,
pub headers: &'a BTreeMap<String, String>,
pub want_body: bool,
}
pub(crate) struct FetchResponse {
pub status: u16,
pub final_url: String,
pub body: String,
}
pub(crate) struct FetchError(pub UncertainReason);
#[async_trait]
pub(crate) trait Fetcher: Send + Sync {
async fn fetch(&self, req: &FetchRequest<'_>) -> Result<FetchResponse, FetchError>;
}
pub(crate) struct HttpFetcher {
inner: reqwest::Client,
}
impl HttpFetcher {
pub(crate) fn new(inner: reqwest::Client) -> Self {
Self { inner }
}
pub(crate) fn client(&self) -> &reqwest::Client {
&self.inner
}
}
#[async_trait]
impl Fetcher for HttpFetcher {
async fn fetch(&self, req: &FetchRequest<'_>) -> Result<FetchResponse, FetchError> {
let sent = match req.method {
HttpMethod::Post => {
send(
&self.inner,
reqwest::Method::POST,
req.url,
req.user_agent,
req.body,
)
.await
}
HttpMethod::Get if req.want_body => {
send(
&self.inner,
reqwest::Method::GET,
req.url,
req.user_agent,
None,
)
.await
}
HttpMethod::Get => {
match send(
&self.inner,
reqwest::Method::HEAD,
req.url,
req.user_agent,
None,
)
.await
{
Ok(r) if r.status().as_u16() == 405 => {
send(
&self.inner,
reqwest::Method::GET,
req.url,
req.user_agent,
None,
)
.await
}
other => other,
}
}
};
let response = match sent {
Ok(r) => r,
Err(err) => {
tracing::debug!(url = %req.url, error = %err, "request failed");
return Err(FetchError(UncertainReason::Network(err.to_string())));
}
};
let status = response.status().as_u16();
let final_url = response.url().to_string();
if let Some(reason) = ban::detect_pre_body(status, response.headers()) {
tracing::warn!(url = %req.url, status, %reason, "ban-like response");
return Err(FetchError(reason));
}
let body = if req.want_body {
match response.text().await {
Ok(b) => b,
Err(err) => return Err(FetchError(UncertainReason::BodyRead(err.to_string()))),
}
} else {
String::new()
};
if !body.is_empty() {
if let Some(reason) = ban::detect_in_body(&body) {
tracing::warn!(url = %req.url, %reason, "ban-like body");
return Err(FetchError(reason));
}
}
Ok(FetchResponse {
status,
final_url,
body,
})
}
}
pub(crate) struct BrowserFetcher {
backend: Arc<dyn BrowserBackend>,
}
impl BrowserFetcher {
pub(crate) fn new(backend: Arc<dyn BrowserBackend>) -> Self {
Self { backend }
}
}
#[async_trait]
impl Fetcher for BrowserFetcher {
async fn fetch(&self, req: &FetchRequest<'_>) -> Result<FetchResponse, FetchError> {
let parsed = match url::Url::parse(req.url) {
Ok(u) => u,
Err(err) => {
return Err(FetchError(UncertainReason::Other(format!(
"invalid url: {err}"
))));
}
};
match self
.backend
.fetch(&parsed, req.headers, BROWSER_TIMEOUT)
.await
{
Ok(page) => Ok(FetchResponse {
status: page.status,
final_url: page.final_url.as_str().to_owned(),
body: page.body,
}),
Err(err) => {
tracing::warn!(url = %req.url, error = %err, "browser fetch failed");
Err(FetchError(UncertainReason::BrowserFailed(err.to_string())))
}
}
}
}
async fn send(
client: &reqwest::Client,
method: reqwest::Method,
url: &str,
ua: Option<&str>,
body: Option<&str>,
) -> reqwest::Result<reqwest::Response> {
let mut request = client.request(method, url);
if let Some(ua) = ua {
request = request.header(reqwest::header::USER_AGENT, ua);
}
if let Some(b) = body {
request = request
.header(reqwest::header::CONTENT_TYPE, "application/json")
.body(b.to_owned());
}
request.send().await
}