#![forbid(unsafe_code)]
#![doc = include_str!("../README.md")]
use core::{fmt, str::FromStr};
use std::error::Error;
fn is_http_url(value: &str) -> bool {
let lower = value.to_ascii_lowercase();
(lower.starts_with("https://") || lower.starts_with("http://")) && value.contains('.')
}
fn validate_url(
value: impl AsRef<str>,
field: &'static str,
) -> Result<String, CanonicalValueError> {
let trimmed = value.as_ref().trim();
if trimmed.is_empty() {
return Err(CanonicalValueError::Empty { field });
}
if is_http_url(trimmed) {
Ok(trimmed.to_string())
} else {
Err(CanonicalValueError::InvalidUrl)
}
}
#[derive(Clone, Copy, Debug, Eq, PartialEq)]
pub enum CanonicalValueError {
Empty { field: &'static str },
InvalidUrl,
InvalidHreflang,
}
impl fmt::Display for CanonicalValueError {
fn fmt(&self, formatter: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
Self::Empty { field } => write!(formatter, "{field} cannot be empty"),
Self::InvalidUrl => formatter.write_str("URL must start with http:// or https://"),
Self::InvalidHreflang => formatter.write_str("hreflang tag shape is unsupported"),
}
}
}
impl Error for CanonicalValueError {}
#[derive(Clone, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
pub struct CanonicalUrl(String);
impl CanonicalUrl {
pub fn new(value: impl AsRef<str>) -> Result<Self, CanonicalValueError> {
validate_url(value, "canonical URL").map(Self)
}
#[must_use]
pub fn as_str(&self) -> &str {
&self.0
}
}
impl AsRef<str> for CanonicalUrl {
fn as_ref(&self) -> &str {
self.as_str()
}
}
impl fmt::Display for CanonicalUrl {
fn fmt(&self, formatter: &mut fmt::Formatter<'_>) -> fmt::Result {
formatter.write_str(self.as_str())
}
}
impl FromStr for CanonicalUrl {
type Err = CanonicalValueError;
fn from_str(value: &str) -> Result<Self, Self::Err> {
Self::new(value)
}
}
#[derive(Clone, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
pub struct HreflangTag(String);
impl HreflangTag {
pub fn new(value: impl AsRef<str>) -> Result<Self, CanonicalValueError> {
let trimmed = value.as_ref().trim();
if trimmed.eq_ignore_ascii_case("x-default") {
return Ok(Self("x-default".to_string()));
}
let parts = trimmed.split('-').collect::<Vec<_>>();
let valid_language = parts.first().is_some_and(|part| {
(2..=3).contains(&part.len()) && part.bytes().all(|b| b.is_ascii_alphabetic())
});
let valid_region = parts.get(1).is_none_or(|part| {
(part.len() == 2 && part.bytes().all(|b| b.is_ascii_alphabetic()))
|| (part.len() == 3 && part.bytes().all(|b| b.is_ascii_digit()))
});
if parts.len() <= 2 && valid_language && valid_region {
let mut normalized = parts[0].to_ascii_lowercase();
if let Some(region) = parts.get(1) {
normalized.push('-');
normalized.push_str(®ion.to_ascii_uppercase());
}
Ok(Self(normalized))
} else {
Err(CanonicalValueError::InvalidHreflang)
}
}
#[must_use]
pub fn as_str(&self) -> &str {
&self.0
}
}
impl AsRef<str> for HreflangTag {
fn as_ref(&self) -> &str {
self.as_str()
}
}
#[derive(Clone, Debug, Eq, PartialEq)]
pub struct AlternateUrl {
url: String,
hreflang: Option<HreflangTag>,
}
impl AlternateUrl {
pub fn new(value: impl AsRef<str>) -> Result<Self, CanonicalValueError> {
Ok(Self {
url: validate_url(value, "alternate URL")?,
hreflang: None,
})
}
#[must_use]
pub fn with_hreflang(mut self, tag: HreflangTag) -> Self {
self.hreflang = Some(tag);
self
}
#[must_use]
pub fn as_str(&self) -> &str {
&self.url
}
#[must_use]
pub const fn hreflang(&self) -> Option<&HreflangTag> {
self.hreflang.as_ref()
}
}
#[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
pub enum RedirectKind {
Permanent,
Temporary,
SeeOther,
Gone,
}
#[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
pub enum DuplicateSurfaceHint {
QueryParameters,
TrailingSlash,
HttpHttps,
WwwNonWww,
LocaleVariant,
PrintPage,
SyndicatedCopy,
}
#[derive(Clone, Debug, Eq, PartialEq)]
pub struct CanonicalGroup {
canonical: CanonicalUrl,
alternates: Vec<AlternateUrl>,
hints: Vec<DuplicateSurfaceHint>,
}
impl CanonicalGroup {
#[must_use]
pub const fn new(canonical: CanonicalUrl) -> Self {
Self {
canonical,
alternates: Vec::new(),
hints: Vec::new(),
}
}
#[must_use]
pub fn with_alternate(mut self, alternate: AlternateUrl) -> Self {
self.alternates.push(alternate);
self
}
#[must_use]
pub fn with_hint(mut self, hint: DuplicateSurfaceHint) -> Self {
self.hints.push(hint);
self
}
#[must_use]
pub const fn canonical(&self) -> &CanonicalUrl {
&self.canonical
}
#[must_use]
pub fn alternates(&self) -> &[AlternateUrl] {
&self.alternates
}
#[must_use]
pub fn hints(&self) -> &[DuplicateSurfaceHint] {
&self.hints
}
}
#[cfg(test)]
mod tests {
use super::{AlternateUrl, CanonicalGroup, CanonicalUrl, DuplicateSurfaceHint, HreflangTag};
#[test]
fn validates_canonical_urls() {
assert!(CanonicalUrl::new("https://example.com/").is_ok());
assert!(CanonicalUrl::new("/relative").is_err());
}
#[test]
fn normalizes_hreflang_tags() {
assert_eq!(HreflangTag::new("EN-us").unwrap().as_str(), "en-US");
assert_eq!(HreflangTag::new("x-default").unwrap().as_str(), "x-default");
assert!(HreflangTag::new("too-many-parts").is_err());
}
#[test]
fn builds_canonical_groups() {
let group = CanonicalGroup::new(CanonicalUrl::new("https://example.com/en/").unwrap())
.with_alternate(
AlternateUrl::new("https://example.com/es/")
.unwrap()
.with_hreflang(HreflangTag::new("es").unwrap()),
)
.with_hint(DuplicateSurfaceHint::LocaleVariant);
assert_eq!(group.alternates().len(), 1);
assert_eq!(group.hints(), &[DuplicateSurfaceHint::LocaleVariant]);
}
}