use super::{ForgeApi, error};
use crate::constants::USER_AGENT;
use texting_robots::Robot as Restrictions;
use url::Url;
pub struct RobotsTxt {
restrictions: Option<Restrictions>,
client: reqwest::Client,
}
impl RobotsTxt {
pub(crate) const fn client(&self) -> &reqwest::Client {
&self.client
}
pub async fn fetch_from<F: ForgeApi>(client: reqwest::Client, api: &F, base_url: &Url) -> Self {
match Self::_fetch(&client, api, base_url).await {
Err(err) => {
println!(
"↘⚠️ Warning: Network error while fetching robots.txt. Assuming unrestricted. Error: {err}"
);
Self::empty(client)
}
Ok(None) => Self::empty(client), Ok(Some(restrictions)) => Self::new(client, restrictions),
}
}
pub fn is_restricted(&self, endpoint_url: &Url) -> bool {
match self.restrictions.as_ref() {
None => false, Some(restrictions) => !restrictions.allowed(endpoint_url.as_str()),
}
}
fn new(client: reqwest::Client, restrictions: Restrictions) -> Self {
Self {
client,
restrictions: Some(restrictions),
}
}
fn empty(client: reqwest::Client) -> Self {
Self {
client,
restrictions: None,
}
}
async fn _fetch<F: ForgeApi>(
client: &reqwest::Client,
api: &F,
base_url: &Url,
) -> Result<Option<Restrictions>, error::Error> {
if api.access_token_value().is_some() {
println!(
"Skipping robots.txt fetch; we have an API token, so we're assuming no restrictions."
);
return Ok(None);
}
if is_localhost(base_url) {
println!(
"Skipping robots.txt fetch; upstream is localhost, so we're assuming no restrictions."
);
return Ok(None);
}
let uri = base_url.join("/robots.txt").expect("Valid URL path");
println!("↗ GET HTTP {uri}");
let req = client.get(uri.as_str()).build().expect("valid req");
let txt = match client.execute(req).await {
Err(err) => {
println!("Request failed due to error: {err}");
return Err(error::Error::NetworkFailure);
}
Ok(response) => match response.text().await {
Err(err) => {
println!("Could not get text from response: {err}");
return Err(error::Error::UnexpectedResponse);
}
Ok(txt) => txt,
},
};
match Restrictions::new(USER_AGENT, txt.as_bytes()) {
Err(err) => {
println!(
"↘⚠️ Warning: Could not parse robots.txt. Assuming unrestricted. Error: {err}"
);
Ok(None)
}
Ok(restrictions) => Ok(Some(restrictions)),
}
}
}
fn is_localhost(url: &Url) -> bool {
url.domain().is_none_or(|domain| domain == "localhost")
}