Skip to main content

use_canonical/
lib.rs

1#![forbid(unsafe_code)]
2#![doc = include_str!("../README.md")]
3
4use core::{fmt, str::FromStr};
5use std::error::Error;
6
7fn is_http_url(value: &str) -> bool {
8    let lower = value.to_ascii_lowercase();
9    (lower.starts_with("https://") || lower.starts_with("http://")) && value.contains('.')
10}
11
12fn validate_url(
13    value: impl AsRef<str>,
14    field: &'static str,
15) -> Result<String, CanonicalValueError> {
16    let trimmed = value.as_ref().trim();
17    if trimmed.is_empty() {
18        return Err(CanonicalValueError::Empty { field });
19    }
20    if is_http_url(trimmed) {
21        Ok(trimmed.to_string())
22    } else {
23        Err(CanonicalValueError::InvalidUrl)
24    }
25}
26
27/// Error returned by canonical primitive constructors.
28#[derive(Clone, Copy, Debug, Eq, PartialEq)]
29pub enum CanonicalValueError {
30    /// The supplied value was empty after trimming whitespace.
31    Empty { field: &'static str },
32    /// The URL did not look like an HTTP or HTTPS URL.
33    InvalidUrl,
34    /// The hreflang tag was unsupported.
35    InvalidHreflang,
36}
37
38impl fmt::Display for CanonicalValueError {
39    fn fmt(&self, formatter: &mut fmt::Formatter<'_>) -> fmt::Result {
40        match self {
41            Self::Empty { field } => write!(formatter, "{field} cannot be empty"),
42            Self::InvalidUrl => formatter.write_str("URL must start with http:// or https://"),
43            Self::InvalidHreflang => formatter.write_str("hreflang tag shape is unsupported"),
44        }
45    }
46}
47
48impl Error for CanonicalValueError {}
49
50/// A canonical URL label.
51#[derive(Clone, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
52pub struct CanonicalUrl(String);
53
54impl CanonicalUrl {
55    /// Creates a canonical URL.
56    ///
57    /// # Errors
58    ///
59    /// Returns [`CanonicalValueError`] when the URL is empty or unsupported.
60    pub fn new(value: impl AsRef<str>) -> Result<Self, CanonicalValueError> {
61        validate_url(value, "canonical URL").map(Self)
62    }
63
64    /// Returns the URL string.
65    #[must_use]
66    pub fn as_str(&self) -> &str {
67        &self.0
68    }
69}
70
71impl AsRef<str> for CanonicalUrl {
72    fn as_ref(&self) -> &str {
73        self.as_str()
74    }
75}
76
77impl fmt::Display for CanonicalUrl {
78    fn fmt(&self, formatter: &mut fmt::Formatter<'_>) -> fmt::Result {
79        formatter.write_str(self.as_str())
80    }
81}
82
83impl FromStr for CanonicalUrl {
84    type Err = CanonicalValueError;
85
86    fn from_str(value: &str) -> Result<Self, Self::Err> {
87        Self::new(value)
88    }
89}
90
91/// A hreflang tag for alternate URLs.
92#[derive(Clone, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
93pub struct HreflangTag(String);
94
95impl HreflangTag {
96    /// Creates a normalized hreflang tag.
97    ///
98    /// # Errors
99    ///
100    /// Returns [`CanonicalValueError::InvalidHreflang`] when the shape is unsupported.
101    pub fn new(value: impl AsRef<str>) -> Result<Self, CanonicalValueError> {
102        let trimmed = value.as_ref().trim();
103        if trimmed.eq_ignore_ascii_case("x-default") {
104            return Ok(Self("x-default".to_string()));
105        }
106
107        let parts = trimmed.split('-').collect::<Vec<_>>();
108        let valid_language = parts.first().is_some_and(|part| {
109            (2..=3).contains(&part.len()) && part.bytes().all(|b| b.is_ascii_alphabetic())
110        });
111        let valid_region = parts.get(1).is_none_or(|part| {
112            (part.len() == 2 && part.bytes().all(|b| b.is_ascii_alphabetic()))
113                || (part.len() == 3 && part.bytes().all(|b| b.is_ascii_digit()))
114        });
115
116        if parts.len() <= 2 && valid_language && valid_region {
117            let mut normalized = parts[0].to_ascii_lowercase();
118            if let Some(region) = parts.get(1) {
119                normalized.push('-');
120                normalized.push_str(&region.to_ascii_uppercase());
121            }
122            Ok(Self(normalized))
123        } else {
124            Err(CanonicalValueError::InvalidHreflang)
125        }
126    }
127
128    /// Returns the normalized hreflang tag.
129    #[must_use]
130    pub fn as_str(&self) -> &str {
131        &self.0
132    }
133}
134
135impl AsRef<str> for HreflangTag {
136    fn as_ref(&self) -> &str {
137        self.as_str()
138    }
139}
140
141/// An alternate URL with an optional hreflang tag.
142#[derive(Clone, Debug, Eq, PartialEq)]
143pub struct AlternateUrl {
144    url: String,
145    hreflang: Option<HreflangTag>,
146}
147
148impl AlternateUrl {
149    /// Creates an alternate URL.
150    ///
151    /// # Errors
152    ///
153    /// Returns [`CanonicalValueError`] when the URL is empty or unsupported.
154    pub fn new(value: impl AsRef<str>) -> Result<Self, CanonicalValueError> {
155        Ok(Self {
156            url: validate_url(value, "alternate URL")?,
157            hreflang: None,
158        })
159    }
160
161    /// Sets the hreflang tag.
162    #[must_use]
163    pub fn with_hreflang(mut self, tag: HreflangTag) -> Self {
164        self.hreflang = Some(tag);
165        self
166    }
167
168    /// Returns the URL string.
169    #[must_use]
170    pub fn as_str(&self) -> &str {
171        &self.url
172    }
173
174    /// Returns the optional hreflang tag.
175    #[must_use]
176    pub const fn hreflang(&self) -> Option<&HreflangTag> {
177        self.hreflang.as_ref()
178    }
179}
180
181/// Redirect relationship kind.
182#[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
183pub enum RedirectKind {
184    /// Permanent redirect.
185    Permanent,
186    /// Temporary redirect.
187    Temporary,
188    /// See-other redirect.
189    SeeOther,
190    /// Gone surface.
191    Gone,
192}
193
194/// Duplicate surface hint for canonical grouping.
195#[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
196pub enum DuplicateSurfaceHint {
197    /// Query parameters can duplicate content.
198    QueryParameters,
199    /// Trailing slash variants can duplicate content.
200    TrailingSlash,
201    /// HTTP and HTTPS variants can duplicate content.
202    HttpHttps,
203    /// `www` and apex host variants can duplicate content.
204    WwwNonWww,
205    /// Locale variants exist for the same surface.
206    LocaleVariant,
207    /// Print page variant.
208    PrintPage,
209    /// Syndicated copy of a surface.
210    SyndicatedCopy,
211}
212
213/// A canonical URL with alternates and duplicate-surface hints.
214#[derive(Clone, Debug, Eq, PartialEq)]
215pub struct CanonicalGroup {
216    canonical: CanonicalUrl,
217    alternates: Vec<AlternateUrl>,
218    hints: Vec<DuplicateSurfaceHint>,
219}
220
221impl CanonicalGroup {
222    /// Creates a canonical group.
223    #[must_use]
224    pub const fn new(canonical: CanonicalUrl) -> Self {
225        Self {
226            canonical,
227            alternates: Vec::new(),
228            hints: Vec::new(),
229        }
230    }
231
232    /// Adds an alternate URL.
233    #[must_use]
234    pub fn with_alternate(mut self, alternate: AlternateUrl) -> Self {
235        self.alternates.push(alternate);
236        self
237    }
238
239    /// Adds a duplicate-surface hint.
240    #[must_use]
241    pub fn with_hint(mut self, hint: DuplicateSurfaceHint) -> Self {
242        self.hints.push(hint);
243        self
244    }
245
246    /// Returns the canonical URL.
247    #[must_use]
248    pub const fn canonical(&self) -> &CanonicalUrl {
249        &self.canonical
250    }
251
252    /// Returns alternate URLs.
253    #[must_use]
254    pub fn alternates(&self) -> &[AlternateUrl] {
255        &self.alternates
256    }
257
258    /// Returns duplicate-surface hints.
259    #[must_use]
260    pub fn hints(&self) -> &[DuplicateSurfaceHint] {
261        &self.hints
262    }
263}
264
265#[cfg(test)]
266mod tests {
267    use super::{AlternateUrl, CanonicalGroup, CanonicalUrl, DuplicateSurfaceHint, HreflangTag};
268
269    #[test]
270    fn validates_canonical_urls() {
271        assert!(CanonicalUrl::new("https://example.com/").is_ok());
272        assert!(CanonicalUrl::new("/relative").is_err());
273    }
274
275    #[test]
276    fn normalizes_hreflang_tags() {
277        assert_eq!(HreflangTag::new("EN-us").unwrap().as_str(), "en-US");
278        assert_eq!(HreflangTag::new("x-default").unwrap().as_str(), "x-default");
279        assert!(HreflangTag::new("too-many-parts").is_err());
280    }
281
282    #[test]
283    fn builds_canonical_groups() {
284        let group = CanonicalGroup::new(CanonicalUrl::new("https://example.com/en/").unwrap())
285            .with_alternate(
286                AlternateUrl::new("https://example.com/es/")
287                    .unwrap()
288                    .with_hreflang(HreflangTag::new("es").unwrap()),
289            )
290            .with_hint(DuplicateSurfaceHint::LocaleVariant);
291
292        assert_eq!(group.alternates().len(), 1);
293        assert_eq!(group.hints(), &[DuplicateSurfaceHint::LocaleVariant]);
294    }
295}