Skip to main content

webfinger_rs/types/
resource.rs

1use std::fmt;
2use std::str::FromStr;
3
4use http::Uri;
5
6/// Errors that can occur while parsing a WebFinger resource URI.
7#[non_exhaustive]
8#[derive(Debug, thiserror::Error)]
9pub enum ResourceError {
10    /// The resource is a relative reference instead of an absolute URI.
11    #[error("resource must be an absolute URI")]
12    RelativeReference,
13
14    /// The resource contains raw text outside the URI character set.
15    ///
16    /// Resource URI text must be ASCII and every byte must be allowed by RFC 3986 as an
17    /// `unreserved`, `reserved`, or percent-escape marker byte. Characters outside that set, such
18    /// as `{`, `|`, `^`, and non-ASCII code points, must be percent-encoded before parsing.
19    #[error("resource contains invalid URI characters")]
20    InvalidCharacters,
21
22    /// The resource contains a malformed percent escape.
23    #[error("resource contains invalid percent encoding")]
24    InvalidPercentEncoding,
25
26    /// The resource is an invalid HTTP or HTTPS URI.
27    #[error(transparent)]
28    InvalidHttpUri(#[from] http::uri::InvalidUri),
29
30    /// The resource is an HTTP or HTTPS URI without an authority.
31    #[error("HTTP and HTTPS resources must include an authority")]
32    MissingHttpAuthority,
33}
34
35/// A WebFinger resource URI.
36///
37/// RFC 7033 uses the `resource` query parameter for the query target, which is a URI rather than a
38/// relative reference. `Resource` stores that URI text after checking the URI syntax that this crate
39/// relies on at request boundaries.
40///
41/// Validation is intentionally conservative:
42///
43/// - the value must start with an RFC 3986 URI scheme;
44/// - the value must contain only raw RFC 3986 URI characters;
45/// - every `%` must start a complete percent escape;
46/// - raw non-ASCII text must already be percent-encoded; and
47/// - `http` and `https` resources must use the `//authority` form before their host is exposed
48///   through [`Resource::host`].
49///
50/// Common valid resources include `acct:carol@example.com` and
51/// `https://example.org/users/carol`.
52///
53/// # Examples
54///
55/// Parse a valid `acct:` resource:
56///
57/// ```rust
58/// use webfinger_rs::Resource;
59///
60/// let resource = "acct:carol@example.com".parse::<Resource>()?;
61/// assert_eq!(resource.as_str(), "acct:carol@example.com");
62/// # Ok::<(), webfinger_rs::ResourceError>(())
63/// ```
64///
65/// Raw characters outside the URI character set are rejected. Percent-encode them inside the
66/// resource URI before putting that URI in the outer WebFinger query string:
67///
68/// ```rust
69/// use webfinger_rs::{Resource, ResourceError};
70///
71/// let error = "acct:carol{admin}@example.com"
72///     .parse::<Resource>()
73///     .unwrap_err();
74/// assert!(matches!(error, ResourceError::InvalidCharacters));
75///
76/// let resource = "acct:carol%7Badmin%7D@example.com".parse::<Resource>()?;
77/// assert_eq!(resource.as_str(), "acct:carol%7Badmin%7D@example.com");
78/// # Ok::<(), webfinger_rs::ResourceError>(())
79/// ```
80///
81/// HTTP(S) resources must include an authority so host inference cannot treat opaque URI text as a
82/// host:
83///
84/// ```rust
85/// use webfinger_rs::{Resource, ResourceError};
86///
87/// let error = "https:example.org/profile"
88///     .parse::<Resource>()
89///     .unwrap_err();
90/// assert!(matches!(error, ResourceError::MissingHttpAuthority));
91///
92/// let resource = "https://example.org/profile".parse::<Resource>()?;
93/// assert_eq!(resource.host(), Some("example.org"));
94/// # Ok::<(), webfinger_rs::ResourceError>(())
95/// ```
96///
97/// See [RFC 7033 section 4.1] for the `resource` parameter, [RFC 3986 section 2.1] for percent
98/// encoding, [RFC 3986 section 2.2] for reserved characters, [RFC 3986 section 2.3] for
99/// unreserved characters, [RFC 3986 section 3.1] for URI schemes, and [RFC 3986 section 3.2] for
100/// authority.
101///
102/// [RFC 7033 section 4.1]: https://www.rfc-editor.org/rfc/rfc7033.html#section-4.1
103/// [RFC 3986 section 2.1]: https://www.rfc-editor.org/rfc/rfc3986.html#section-2.1
104/// [RFC 3986 section 2.2]: https://www.rfc-editor.org/rfc/rfc3986.html#section-2.2
105/// [RFC 3986 section 2.3]: https://www.rfc-editor.org/rfc/rfc3986.html#section-2.3
106/// [RFC 3986 section 3.1]: https://www.rfc-editor.org/rfc/rfc3986.html#section-3.1
107/// [RFC 3986 section 3.2]: https://www.rfc-editor.org/rfc/rfc3986.html#section-3.2
108#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
109pub struct Resource {
110    text: String,
111    host: Option<String>,
112}
113
114impl Resource {
115    /// Returns the resource URI as a string slice.
116    pub fn as_str(&self) -> &str {
117        &self.text
118    }
119
120    /// Returns the resource as an [`http::Uri`] when it fits that representation.
121    ///
122    /// WebFinger resources can use schemes such as `acct:` that are valid URI strings but do not
123    /// expose a host through [`http::Uri`]. This accessor is mainly useful for hierarchical
124    /// resources such as `https://example.org/users/carol`.
125    pub fn uri(&self) -> Option<Uri> {
126        Uri::try_from(self.as_str()).ok()
127    }
128
129    /// Returns the host from the resource's [`http::Uri`] representation, when present.
130    ///
131    /// URI schemes such as `acct:` do not have a host in [`http::Uri`], so this returns `None` for
132    /// those resources.
133    pub fn host(&self) -> Option<&str> {
134        self.host.as_deref()
135    }
136}
137
138impl fmt::Display for Resource {
139    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
140        f.write_str(&self.text)
141    }
142}
143
144impl AsRef<str> for Resource {
145    fn as_ref(&self) -> &str {
146        self.as_str()
147    }
148}
149
150impl FromStr for Resource {
151    type Err = ResourceError;
152
153    fn from_str(resource: &str) -> Result<Self, Self::Err> {
154        let host = validate_resource(resource)?;
155        Ok(Self {
156            text: resource.to_string(),
157            host,
158        })
159    }
160}
161
162impl TryFrom<String> for Resource {
163    type Error = ResourceError;
164
165    fn try_from(resource: String) -> Result<Self, Self::Error> {
166        let host = validate_resource(&resource)?;
167        Ok(Self {
168            text: resource,
169            host,
170        })
171    }
172}
173
174impl TryFrom<&str> for Resource {
175    type Error = ResourceError;
176
177    fn try_from(resource: &str) -> Result<Self, Self::Error> {
178        resource.parse()
179    }
180}
181
182fn validate_resource(resource: &str) -> Result<Option<String>, ResourceError> {
183    let Some(scheme) = scheme(resource) else {
184        return Err(ResourceError::RelativeReference);
185    };
186    if !resource.is_ascii() {
187        return Err(ResourceError::InvalidCharacters);
188    }
189    validate_uri_characters(resource)?;
190    validate_percent_escapes(resource)?;
191    if scheme.eq_ignore_ascii_case("http") || scheme.eq_ignore_ascii_case("https") {
192        // WebFinger only needs host inference for hierarchical HTTP(S) resources. RFC 3986
193        // section 3.2 attaches an authority to URIs that begin their hier-part with `//`; opaque
194        // forms like `http:foo` must not produce a synthetic host.
195        if !resource[scheme.len()..].starts_with("://") {
196            return Err(ResourceError::MissingHttpAuthority);
197        }
198        let uri = Uri::try_from(resource).map_err(ResourceError::InvalidHttpUri)?;
199        let Some(host) = uri.host() else {
200            return Err(ResourceError::MissingHttpAuthority);
201        };
202        return Ok(Some(host.to_string()));
203    }
204    Ok(None)
205}
206
207fn validate_percent_escapes(resource: &str) -> Result<(), ResourceError> {
208    let mut bytes = resource.as_bytes().iter();
209    while let Some(byte) = bytes.next() {
210        if *byte != b'%' {
211            continue;
212        }
213        let Some(high) = bytes.next() else {
214            return Err(ResourceError::InvalidPercentEncoding);
215        };
216        let Some(low) = bytes.next() else {
217            return Err(ResourceError::InvalidPercentEncoding);
218        };
219        if !high.is_ascii_hexdigit() || !low.is_ascii_hexdigit() {
220            return Err(ResourceError::InvalidPercentEncoding);
221        }
222    }
223    Ok(())
224}
225
226fn validate_uri_characters(resource: &str) -> Result<(), ResourceError> {
227    if resource.bytes().all(is_uri_character) {
228        Ok(())
229    } else {
230        Err(ResourceError::InvalidCharacters)
231    }
232}
233
234fn is_uri_character(byte: u8) -> bool {
235    matches!(
236        byte,
237        b'A'..=b'Z'
238            | b'a'..=b'z'
239            | b'0'..=b'9'
240            | b'-'
241            | b'.'
242            | b'_'
243            | b'~'
244            | b':'
245            | b'/'
246            | b'?'
247            | b'#'
248            | b'['
249            | b']'
250            | b'@'
251            | b'!'
252            | b'$'
253            | b'&'
254            | b'\''
255            | b'('
256            | b')'
257            | b'*'
258            | b'+'
259            | b','
260            | b';'
261            | b'='
262            | b'%'
263    )
264}
265
266fn scheme(resource: &str) -> Option<&str> {
267    let mut bytes = resource.bytes();
268    let first = bytes.next()?;
269    if !first.is_ascii_alphabetic() {
270        return None;
271    }
272
273    for (index, byte) in bytes.enumerate() {
274        match byte {
275            b':' => return Some(&resource[..index + 1]),
276            b'/' | b'?' | b'#' => return None,
277            b'A'..=b'Z' | b'a'..=b'z' | b'0'..=b'9' | b'+' | b'-' | b'.' => {}
278            _ => return None,
279        }
280    }
281    None
282}
283
284#[cfg(test)]
285mod tests {
286    use super::*;
287
288    /// Accepts `acct:` resources because they are absolute URIs with a scheme.
289    #[test]
290    fn accepts_acct_resource() {
291        let resource = "acct:carol@example.com".parse::<Resource>().unwrap();
292
293        assert_eq!(resource.as_str(), "acct:carol@example.com");
294    }
295
296    /// Accepts hierarchical HTTPS resources with an authority.
297    #[test]
298    fn accepts_https_resource() {
299        let resource = "https://example.org/users/carol"
300            .parse::<Resource>()
301            .unwrap();
302
303        assert_eq!(resource.as_str(), "https://example.org/users/carol");
304        assert_eq!(resource.host(), Some("example.org"));
305    }
306
307    /// Accepts scheme-specific opaque-looking URIs.
308    ///
309    /// RFC 3986's `URI` production requires a scheme but allows a scheme-specific path without an
310    /// authority. WebFinger commonly uses this shape for `acct:` resources.
311    #[test]
312    fn accepts_scheme_specific_resource() {
313        let resource = "urn:example:animal:ferret:nose"
314            .parse::<Resource>()
315            .unwrap();
316
317        assert_eq!(resource.as_str(), "urn:example:animal:ferret:nose");
318    }
319
320    /// Rejects relative references that `http::Uri` can otherwise parse.
321    #[test]
322    fn rejects_relative_resource_references() {
323        for resource in ["carol", "/relative", "../x", ""] {
324            let error = resource.parse::<Resource>().unwrap_err();
325
326            assert!(
327                matches!(error, ResourceError::RelativeReference),
328                "expected relative-resource error for {resource:?}, got {error:?}",
329            );
330        }
331    }
332
333    /// Rejects raw non-ASCII resource text.
334    ///
335    /// RFC 3986 URI syntax is ASCII. Non-ASCII data must be percent-encoded inside the resource URI
336    /// itself before it is put into the WebFinger query parameter.
337    #[test]
338    fn rejects_non_ascii_resource_text() {
339        let error = "acct:carolé@example.org".parse::<Resource>().unwrap_err();
340
341        assert!(
342            matches!(error, ResourceError::InvalidCharacters),
343            "expected invalid-character error, got {error:?}",
344        );
345    }
346
347    /// Rejects raw ASCII characters outside the RFC 3986 URI character set.
348    #[test]
349    fn rejects_invalid_raw_uri_characters() {
350        for resource in [
351            "acct:carol{bad}@example.org",
352            "acct:carol|bad@example.org",
353            "acct:carol^bad@example.org",
354            "acct:carol`bad@example.org",
355        ] {
356            let error = resource.parse::<Resource>().unwrap_err();
357
358            assert!(
359                matches!(error, ResourceError::InvalidCharacters),
360                "expected invalid-character error for {resource:?}, got {error:?}",
361            );
362        }
363    }
364
365    /// Accepts characters outside the raw URI character set when they are percent-encoded.
366    #[test]
367    fn accepts_percent_encoded_invalid_raw_characters() {
368        let resource = "acct:carol%7Bbad%7D@example.org"
369            .parse::<Resource>()
370            .unwrap();
371
372        assert_eq!(resource.as_str(), "acct:carol%7Bbad%7D@example.org");
373    }
374
375    /// Rejects malformed percent escape syntax inside resource URIs.
376    ///
377    /// Percent escapes belong to the resource URI itself after the outer WebFinger query has been
378    /// decoded, so malformed escapes must be rejected at the resource boundary too.
379    #[test]
380    fn rejects_malformed_resource_percent_escape() {
381        let error = "acct:carol%GG@example.org".parse::<Resource>().unwrap_err();
382
383        assert!(
384            matches!(error, ResourceError::InvalidPercentEncoding),
385            "expected invalid-percent-encoding error, got {error:?}",
386        );
387    }
388
389    /// Rejects HTTP and HTTPS resources that omit the required authority.
390    #[test]
391    fn rejects_http_resources_without_authority() {
392        for resource in ["http:foo", "https:foo", "http:/example.org/path"] {
393            let error = resource.parse::<Resource>().unwrap_err();
394
395            assert!(
396                matches!(error, ResourceError::MissingHttpAuthority),
397                "expected missing-authority error for {resource:?}, got {error:?}",
398            );
399        }
400    }
401
402    /// Validates HTTP and HTTPS resource authorities regardless of scheme case.
403    ///
404    /// URI schemes are case-insensitive, so uppercase `HTTPS` should not bypass the stricter
405    /// hierarchical URI validation used for HTTP resources.
406    #[test]
407    fn rejects_invalid_https_authority_with_uppercase_scheme() {
408        let error = "HTTPS://[::1".parse::<Resource>().unwrap_err();
409
410        assert!(
411            matches!(error, ResourceError::InvalidHttpUri(_)),
412            "expected invalid-authority error, got {error:?}",
413        );
414    }
415}