gload 0.5.1

A command line client for the Gemini protocol.
Documentation
//! This software is licensed as described in the file LICENSE, which
//! you should have received as part of this distribution.
//!
//! You may opt to use, copy, modify, merge, publish, distribute and/or sell
//! copies of the Software, and permit persons to whom the Software is
//! furnished to do so, under the terms of the LICENSE file.
//!
//! This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
//! KIND, either express or implied.
//!
//! SPDX-License-Identifier: BSD-3-Clause

use core::str::FromStr;
use url::Url;

// TODO: request!() macro that ensures at compile time that a request is valid, maybe even in `const` areas

/// A Gemini protocol request.
/// # Example
/// ```rust
/// # use gload::Request;
/// let req = Request::from_uri_string("gemini://example.com").unwrap();
/// assert_eq!(req.as_bytes(), b"gemini://example.com/\r\n");
/// ```
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct Request(Url);

impl Request {
	/// Constructs a new [`Request`] value from the given absolute URI string. Returns `Err` if
	/// the given URI scheme isn't `gemini://`, the URI contains a nonempty userinfo portion
	/// (password or username), is missing a host portion, or encodes to more than 1024 bytes.
	pub fn from_uri_string<S: AsRef<str>>(absolute_uri: S) -> Result<Self, RequestConstructError> {
		match Url::parse(absolute_uri.as_ref()) {
			Err(url::ParseError::RelativeUrlWithoutBase) => {
				// If the url string has no scheme, try prepending the default
				match Url::parse(&[GEMINI_SCHEME, "://", absolute_uri.as_ref()].concat()) {
					Err(err) => Err(RequestConstructError::UrlParse(err)),
					Ok(uri) => Self::new(uri),
				}
			}
			Err(err) => Err(RequestConstructError::UrlParse(err)),
			Ok(uri) => Self::new(uri),
		}
	}

	/// Constructs a new [`Request`] value from the given absolute URI. Returns `Err` if
	/// the given URI scheme isn't `gemini://`, the URI contains a nonempty userinfo portion
	/// (password or username), is missing a host portion, or encodes to more than 1024 bytes.
	pub fn new(absolute_uri: Url) -> Result<Self, RequestConstructError> {
		let mut uri = absolute_uri;

		// Spec: "the scheme used is "gemini""
		let scheme = uri.scheme();
		if scheme != GEMINI_SCHEME {
			return Err(RequestConstructError::UnsupportedProtocol(scheme.into()));
		}

		// Spec: "the userinfo portion of a URI MUST NOT be used;"
		if !uri.username().is_empty() || uri.password().is_some() {
			return Err(RequestConstructError::Userinfo);
		}

		// We always expect the URI to go somewhere
		if !uri.has_host() {
			return Err(RequestConstructError::MissingAuthority);
		}

		// Spec: "Clients MUST NOT send a fragment as part of the request"
		uri.set_fragment(None);

		// Spec: "If a client is making a request with an empty path, the client SHOULD add a trailing '/' to the request"
		if uri.path().is_empty() {
			uri.set_path("/");
		}

		// Omit the port if it is already default
		if uri.port() == Some(GEMINI_PORT) {
			uri.set_port(None).expect("setting None always succeeds");
		}

		// Spec: "When making a request, the URI MUST NOT exceed 1024 bytes"
		#[allow(clippy::needless_as_bytes)]
		let len = uri.as_str().as_bytes().len();
		if len > URI_LIMIT {
			return Err(RequestConstructError::RequestTooLongError(len));
		}

		Ok(Self(uri))
	}

	/// Creates a new request to the same host for the given path.
	///
	/// Returns `Err` if the path joined to the receiver can't be parsed as a URI,
	/// or the resulting URI is more than 1024 bytes long.
	pub(crate) fn with_new_path(&self, path: &str) -> Result<Self, RequestConstructError> {
		use normalize_path::NormalizePath;
		use std::path::PathBuf;

		if path.is_empty() {
			// leave path unchanged
			return Ok(self.clone());
		}

		let sans_fragment = path.split('#').next().unwrap(); // split at # and drop the rest
		let has_trailing = sans_fragment.ends_with('/');

		let path = PathBuf::from(sans_fragment);
		let new_path = PathBuf::from(self.0.path()).join(path).normalize();
		let mut new_uri = self
			.0
			.join(new_path.to_str().expect("path is utf-8"))
			.map_err(RequestConstructError::UrlParse)?;

		if has_trailing && !new_uri.path().ends_with('/') {
			new_uri.set_path(&[new_uri.path(), "/"].concat());
		}

		Self::new(new_uri)
	}

	/// The host where the request is meant to go.
	#[inline]
	pub(crate) fn host(&self) -> url::Host<&str> {
		self.0.host().expect("constructor made sure host exists")
	}

	/// The host port where the request is meant to go.
	#[inline]
	pub(crate) fn port(&self) -> u16 {
		// Spec: "the port defaults to 1965 if not specified;"
		self.0.port().unwrap_or(GEMINI_PORT)
	}

	/// Constructs a request payload in bytes.
	pub fn as_bytes(&self) -> Vec<u8> {
		// Spec: "The client connects to the server and sends a request which consists of an absolute URI followed by a CR (character 13) and LF (character 10)."
		[self.0.as_str().as_bytes(), &CRLF].concat()
	}
}

#[cfg(not(tarpaulin_include))]
impl core::fmt::Display for Request {
	#[inline]
	fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
		write!(f, "{}", self.0)
	}
}

/// Spec: "CR (character 13)"
pub(crate) const CR: u8 = b'\r';

/// Spec: "LF (character 10)"
pub(crate) const LF: u8 = b'\n';

/// Spec: "a CR (character 13) and LF (character 10)"
const CRLF: [u8; 2] = [CR, LF];

/// Spec: "the port defaults to 1965 if not specified;"
const GEMINI_PORT: u16 = 1965;

/// Spec: "the scheme used is "gemini""
static GEMINI_SCHEME: &str = "gemini";

/// Spec: "When making a request, the URI MUST NOT exceed 1024 bytes"
const URI_LIMIT: usize = 1024;

impl FromStr for Request {
	type Err = RequestConstructError;

	#[inline]
	fn from_str(s: &str) -> Result<Self, Self::Err> {
		Self::from_uri_string(s)
	}
}

impl TryFrom<&str> for Request {
	type Error = RequestConstructError;

	#[inline]
	fn try_from(value: &str) -> Result<Self, Self::Error> {
		Self::from_str(value)
	}
}

impl TryFrom<String> for Request {
	type Error = RequestConstructError;

	#[inline]
	fn try_from(value: String) -> Result<Self, Self::Error> {
		Self::try_from(value.as_str())
	}
}

impl TryFrom<Url> for Request {
	type Error = RequestConstructError;

	#[inline]
	fn try_from(value: Url) -> Result<Self, Self::Error> {
		Self::new(value)
	}
}

/// Indicates a problem constructing the request.
#[derive(Debug)]
pub enum RequestConstructError {
	/// The URI did not have a host portion.
	MissingAuthority,

	/// The UTF-8 encoded URI exceeded 1024 bytes.
	RequestTooLongError(usize),

	/// An unsupported URI scheme was provided. gload only supports `gemini` links.
	UnsupportedProtocol(String),

	/// The given string could not be parsed as a URI.
	UrlParse(url::ParseError),

	/// The URI included a userinfo portion.
	Userinfo,
}

#[cfg(not(tarpaulin_include))]
impl core::fmt::Display for RequestConstructError {
	fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
		match self {
			Self::MissingAuthority => write!(f, "the request URI did not have a Host portion"),
			Self::RequestTooLongError(len) => write!(
				f,
				"the request URI was too long: expected {URI_LIMIT} bytes or fewer, but got {len}"
			),
			Self::UnsupportedProtocol(scheme) => {
				write!(f, "protocol '{scheme}' is not supported")
			}
			Self::UrlParse(err) => write!(f, "could not parse string as URI: {err}"),
			Self::Userinfo => write!(f, "the request URI contained userinfo data"),
		}
	}
}

impl core::error::Error for RequestConstructError {}

// MARK: - Tests

#[cfg(test)]
mod tests {
	use super::*;
	use url::Url;
	use url_static::url;

	#[test]
	fn test_constructs_request() {
		#[rustfmt::skip]
		let cases = [
			(url!("gemini://localhost"), b"gemini://localhost/\r\n".as_ref(), 1965), // always includes CRLF
			(url!("gemini://localhost:1965"), b"gemini://localhost/\r\n", 1965), // omits default port
			(url!("gemini://localhost:1964"), b"gemini://localhost:1964/\r\n", 1964), // includes non-default port
			(url!("gemini://localhost:80"), b"gemini://localhost:80/\r\n", 80), // same for non-gemini known port
			(url!("gemini://:@localhost"), b"gemini://localhost/\r\n", 1965), // implicitly removes empty userinfo
		];
		for (absolute_uri, expected, port) in cases {
			let req = Request::new(absolute_uri).unwrap();
			assert_eq!(req.port(), port);
			assert_eq!(req.host(), url::Host::Domain("localhost"));

			let result = req.as_bytes();
			assert_eq!(result, expected);
		}
	}

	#[test]
	fn test_fails_for_missing_host() {
		let url_str = String::from("gemini://"); // also exercizes the TryFrom constructor
		let result = Request::try_from(url_str);
		assert!(
			matches!(result, Err(RequestConstructError::MissingAuthority)),
			"{result:?}"
		);
	}

	#[test]
	fn test_fails_for_userinfo_url() {
		let cases = [
			url!("gemini://foo:bar@localhost"),
			url!("gemini://foo:@localhost"),
			url!("gemini://:bar@localhost"),
		];
		for absolute_uri in cases {
			let result = Request::new(absolute_uri);
			assert!(
				matches!(result, Err(RequestConstructError::Userinfo)),
				"{result:?}"
			);
		}
	}

	#[test]
	fn test_fails_for_non_gemini_protocol() {
		let cases = [
			(url!("foo://bar"), "foo"),
			(url!("Foo://bar"), "foo"),
			(url!("bar://"), "bar"),
			(url!("nope:"), "nope"),
			(url!("ReallyNo:"), "reallyno"),
		];

		for (absolute_uri, proto) in cases {
			let result = Request::new(absolute_uri);
			assert!(
				matches!(
					&result,
					Err(RequestConstructError::UnsupportedProtocol(p)) if p == proto
				),
				"{result:?}"
			);
		}
	}

	#[test]
	fn test_fails_for_too_long_uri() {
		let cases = [
			Url::parse(&format!("{:A<1025}", "gemini://localhost/A")).unwrap(),
			Url::parse(&format!("{:A<1025}", "gemini://localhost:1964/A")).unwrap(),
			Url::parse(&format!("{:A<1027}", "gemini://:@localhost/A")).unwrap(),
		];
		for absolute_uri in cases {
			let result = Request::try_from(absolute_uri).err().unwrap();
			assert!(
				matches!(result, RequestConstructError::RequestTooLongError(1025)),
				"{result:?}"
			);
		}
	}

	#[test]
	fn test_adds_gemini_protocol_if_none_given() {
		let result = Request::from_uri_string("localhost").unwrap();
		assert_eq!(result.as_bytes(), b"gemini://localhost/\r\n");
	}

	#[test]
	fn test_fails_to_parse_bad_string_even_with_assumed_protocol() {
		let result = Request::from_uri_string("this is not a url").err().unwrap();
		assert!(
			matches!(
				result,
				RequestConstructError::UrlParse(url::ParseError::InvalidDomainCharacter)
			),
			"{result:?}"
		);
	}

	#[test]
	fn test_for_exact_length_uri() {
		let cases = [
			format!("{:A<1024}", "gemini://localhost/A"),
			format!("{:A<1024}", "gemini://localhost:1964/A"),
			format!("{:A<1026}", "gemini://:@localhost/A"),
			// Same as above, but with default scheme:
			format!("{:A<1015}", "localhost/A"),
			format!("{:A<1017}", ":@localhost/A"),
		];
		for absolute_uri in cases {
			let payload = Request::from_str(&absolute_uri).unwrap().as_bytes();
			assert_eq!(payload.len(), URI_LIMIT + CRLF.len());
		}
	}

	#[test]
	fn test_for_right_length_uri() {
		let cases = [
			format!("{:A<1023}", "gemini://localhost/A"),
			format!("{:A<1023}", "gemini://localhost:1964/A"),
			format!("{:A<1025}", "gemini://:@localhost/A"),
			// Same as above, but with default scheme:
			format!("{:A<1014}", "localhost/A"),
			format!("{:A<1016}", ":@localhost/A"),
		];
		for absolute_uri in cases {
			let payload = Request::from_str(&absolute_uri).unwrap().as_bytes();
			assert_eq!(payload.len(), URI_LIMIT + CRLF.len() - 1);
		}
	}
}