git-gemini-forge 0.6.2

A simple Gemini server that serves a read-only view of public repositories from a Git forge.
use super::robotstxt::RobotsTxt;
use super::{ForgeApi, error, user_agent};
use arc_swap::ArcSwap;
use httpdate::parse_http_date;
use isahc::http::header::{self, USER_AGENT};
use isahc::http::{HeaderName, HeaderValue};
use isahc::{Body, Response};
use isahc::{http::StatusCode, prelude::*};
use secrecy::{ExposeSecret, SecretString};
use serde::de::DeserializeOwned;
use std::sync::Arc;
use std::time::{Duration, SystemTime};
use url::Url;

/// A REST endpoint from which to request data of type `Value`.
pub trait ApiEndpoint<Api, Value>
where
	Api: ForgeApi,
	Value: DeserializeOwned,
{
	/// The URL to which to send HTTP requests.
	fn url(&self) -> Url;

	/// The API we're using.
	fn api(&self) -> &Api;
}

/// Manages network activities. Cloning this object holds a reference to the same underlying data.
pub struct Net {
	/// The upstream's robots.txt restrictions.
	restrictions: RobotsTxt,

	/// If we've been timed out (i.e. we've gotten an HTTP 429 error) then
	/// this value is the time after which we should be able to make requests again.
	// timed_out_until: Arc<RwLock<Option<SystemTime>>>,
	timed_out_until: ArcSwap<Option<SystemTime>>,
}

impl Net {
	/// Constructs a new [`Net`] instance that respects the given robots.txt restrictions.
	pub fn new(restrictions: RobotsTxt) -> Self {
		Self {
			restrictions,
			timed_out_until: ArcSwap::new(Arc::new(None)),
		}
	}

	/// Fetches data from the given API endpoint. Results in `Err`
	/// if the result fails for any reason.
	pub fn call<A, V>(&self, endpoint: &dyn ApiEndpoint<A, V>) -> Result<V, error::Error>
	where
		A: ForgeApi,
		V: DeserializeOwned,
	{
		// Make sure we're not waiting on timeout...
		if let Some(retry_after) = self.timed_out_until.load().as_ref() {
			return Err(error::Error::Throttled(Some(*retry_after)));
		}

		let auth_header = match endpoint.api().access_token_value() {
			None => None,
			Some(access_token) => Some((endpoint.api().access_token_header_name(), access_token)),
		};
		let mut response = self.get(&endpoint.url(), auth_header)?;
		let status = response.status();

		// No permission. We may need to provide some kind of token in a header somewhere
		if status == StatusCode::FORBIDDEN
			|| status == StatusCode::UNAUTHORIZED
			|| status == StatusCode::UNAVAILABLE_FOR_LEGAL_REASONS
		{
			return Err(error::Error::Unauthorized);
		}

		// No result found; don't attempt to deserialize, since the message shape may differ
		if status == StatusCode::NOT_FOUND {
			return Err(error::Error::ResourceNotFound);
		}

		if status == StatusCode::TOO_MANY_REQUESTS {
			// Parse Retry-After. If we got multiple of those, report the longest one.
			let retry_after: Option<SystemTime> = response
				.headers()
				.get_all(header::RETRY_AFTER)
				.iter()
				.flat_map(parse_retry_after)
				.max();
			// Cache and enforce this retry-after so we don't keep requesting, lol
			self.timed_out_until.store(Arc::new(retry_after));

			return Err(error::Error::Throttled(retry_after));
		}

		if !status.is_success() {
			return Err(error::Error::UnexpectedResponse);
		}

		match response.json() {
			Ok(obj) => Ok(obj),
			Err(err) => {
				let data = response
					.text()
					.map_err(|_| error::Error::UnexpectedResponse)?;
				println!("Could not parse JSON from response: {err}: {data}");
				Err(error::Error::UnexpectedResponse)
			}
		}
	}

	/// Fetches data from the given URL.
	fn get(
		&self,
		url: &Url,
		auth_header: Option<(HeaderName, SecretString)>,
	) -> Result<Response<Body>, error::Error> {
		if auth_header.is_some() {
			println!("↗ HTTP GET {url} (authenticated)");
		} else {
			println!("↗ HTTP GET {url}");
		}

		// Check upstream's robots.txt that we're allowed
		if self.restrictions.is_restricted(url) {
			return Err(error::Error::Restricted);
		}

		// GET URI
		let mut req = isahc::Request::get(url.as_str());

		// Apply auth header
		if let Some((key, value)) = auth_header {
			req = req.header(key, value.expose_secret());
		}
		req = req.header(USER_AGENT, user_agent());

		match isahc::send(req.body(()).unwrap()) {
			Err(err) => {
				println!("Request failed due to error: {err}");
				Err(error::Error::NetworkFailure)
			}
			Ok(response) => Ok(response),
		}
	}
}

/// Given a [`Retry-After`] header value, determines the time at which requests should be
/// permissible again. Returns [`None`] if no time could be parsed.
///
/// [`Retry-After`]: https://developer.mozilla.org/en-US/docs/Web/HTTP/Reference/Headers/Retry-After
fn parse_retry_after(value: &HeaderValue) -> Option<SystemTime> {
	let value = value.to_str().ok()?;

	// Could be either a date or a number of seconds
	if let Ok(delay_secs) = value.parse::<u64>() {
		// None if out of bounds, where we assume no restriction.
		return SystemTime::now().checked_add(Duration::from_secs(delay_secs));
	}

	// Try as date
	if let Ok(retry_after) = parse_http_date(value) {
		// Turn it into time-from-now, but only if it's in the future
		match retry_after.duration_since(SystemTime::now()) {
			Err(_) => return None, // header value is in the past
			Ok(_) => return Some(retry_after),
		}
	}

	// No luck
	None
}

// MARK: - Tests

#[cfg(test)]
mod tests {
	use super::*;

	#[test]
	fn test_retry_after_seconds() {
		let retry_after = HeaderValue::from_str("4200").unwrap();
		let actual = parse_retry_after(&retry_after).expect("Should have parsed");
		let expected = SystemTime::now()
			.checked_add(Duration::from_secs(4200))
			.expect("Valid time");

		let tolerance = Duration::from_millis(2); // very closely accurate, since we got an exact value
		assert!(expected.duration_since(actual).unwrap() < tolerance);
	}

	#[test]
	fn test_retry_after_negative_seconds() {
		let retry_after = HeaderValue::from_str("-4200").unwrap();
		let actual = parse_retry_after(&retry_after);
		assert!(actual.is_none());
	}

	#[test]
	fn test_retry_after_date() {
		let expected_duration = Duration::from_secs(4200);
		let future = SystemTime::now().checked_add(expected_duration).unwrap();
		let retry_after = HeaderValue::from_str(&httpdate::fmt_http_date(future)).unwrap();
		let actual = parse_retry_after(&retry_after).expect("Should have parsed and be future");

		let tolerance = Duration::from_secs(1); // less accurate, since we need to parse a date
		assert!(future.duration_since(actual).unwrap() < tolerance);
	}

	#[test]
	fn test_retry_before_date() {
		let time = Duration::from_secs(4200);
		let past = SystemTime::now().checked_sub(time).unwrap();
		let retry_after = HeaderValue::from_str(&httpdate::fmt_http_date(past)).unwrap();
		let actual = parse_retry_after(&retry_after);
		assert!(actual.is_none(), "expected None, got {:?}", actual);
	}
}