webfinger_rs/types/resource.rs
1use std::fmt;
2use std::str::FromStr;
3
4use http::Uri;
5
6/// Errors that can occur while parsing a WebFinger resource URI.
7#[non_exhaustive]
8#[derive(Debug, thiserror::Error)]
9pub enum ResourceError {
10 /// The resource is a relative reference instead of an absolute URI.
11 #[error("resource must be an absolute URI")]
12 RelativeReference,
13
14 /// The resource contains raw text outside the URI character set.
15 ///
16 /// Resource URI text must be ASCII and every byte must be allowed by RFC 3986 as an
17 /// `unreserved`, `reserved`, or percent-escape marker byte. Characters outside that set, such
18 /// as `{`, `|`, `^`, and non-ASCII code points, must be percent-encoded before parsing.
19 #[error("resource contains invalid URI characters")]
20 InvalidCharacters,
21
22 /// The resource contains a malformed percent escape.
23 #[error("resource contains invalid percent encoding")]
24 InvalidPercentEncoding,
25
26 /// The resource is an invalid HTTP or HTTPS URI.
27 #[error(transparent)]
28 InvalidHttpUri(#[from] http::uri::InvalidUri),
29
30 /// The resource is an HTTP or HTTPS URI without an authority.
31 #[error("HTTP and HTTPS resources must include an authority")]
32 MissingHttpAuthority,
33}
34
35/// A WebFinger resource URI.
36///
37/// RFC 7033 uses the `resource` query parameter for the query target, which is a URI rather than a
38/// relative reference. `Resource` stores that URI text after checking the URI syntax that this crate
39/// relies on at request boundaries.
40///
41/// Validation is intentionally conservative:
42///
43/// - the value must start with an RFC 3986 URI scheme;
44/// - the value must contain only raw RFC 3986 URI characters;
45/// - every `%` must start a complete percent escape;
46/// - raw non-ASCII text must already be percent-encoded; and
47/// - `http` and `https` resources must use the `//authority` form before their host is exposed
48/// through [`Resource::host`].
49///
50/// Common valid resources include `acct:carol@example.com` and
51/// `https://example.org/users/carol`.
52///
53/// # Examples
54///
55/// Parse a valid `acct:` resource:
56///
57/// ```rust
58/// use webfinger_rs::Resource;
59///
60/// let resource = "acct:carol@example.com".parse::<Resource>()?;
61/// assert_eq!(resource.as_str(), "acct:carol@example.com");
62/// # Ok::<(), webfinger_rs::ResourceError>(())
63/// ```
64///
65/// Raw characters outside the URI character set are rejected. Percent-encode them inside the
66/// resource URI before putting that URI in the outer WebFinger query string:
67///
68/// ```rust
69/// use webfinger_rs::{Resource, ResourceError};
70///
71/// let error = "acct:carol{admin}@example.com"
72/// .parse::<Resource>()
73/// .unwrap_err();
74/// assert!(matches!(error, ResourceError::InvalidCharacters));
75///
76/// let resource = "acct:carol%7Badmin%7D@example.com".parse::<Resource>()?;
77/// assert_eq!(resource.as_str(), "acct:carol%7Badmin%7D@example.com");
78/// # Ok::<(), webfinger_rs::ResourceError>(())
79/// ```
80///
81/// HTTP(S) resources must include an authority so host inference cannot treat opaque URI text as a
82/// host:
83///
84/// ```rust
85/// use webfinger_rs::{Resource, ResourceError};
86///
87/// let error = "https:example.org/profile"
88/// .parse::<Resource>()
89/// .unwrap_err();
90/// assert!(matches!(error, ResourceError::MissingHttpAuthority));
91///
92/// let resource = "https://example.org/profile".parse::<Resource>()?;
93/// assert_eq!(resource.host(), Some("example.org"));
94/// # Ok::<(), webfinger_rs::ResourceError>(())
95/// ```
96///
97/// See [RFC 7033 section 4.1] for the `resource` parameter, [RFC 3986 section 2.1] for percent
98/// encoding, [RFC 3986 section 2.2] for reserved characters, [RFC 3986 section 2.3] for
99/// unreserved characters, [RFC 3986 section 3.1] for URI schemes, and [RFC 3986 section 3.2] for
100/// authority.
101///
102/// [RFC 7033 section 4.1]: https://www.rfc-editor.org/rfc/rfc7033.html#section-4.1
103/// [RFC 3986 section 2.1]: https://www.rfc-editor.org/rfc/rfc3986.html#section-2.1
104/// [RFC 3986 section 2.2]: https://www.rfc-editor.org/rfc/rfc3986.html#section-2.2
105/// [RFC 3986 section 2.3]: https://www.rfc-editor.org/rfc/rfc3986.html#section-2.3
106/// [RFC 3986 section 3.1]: https://www.rfc-editor.org/rfc/rfc3986.html#section-3.1
107/// [RFC 3986 section 3.2]: https://www.rfc-editor.org/rfc/rfc3986.html#section-3.2
108#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
109pub struct Resource {
110 text: String,
111 host: Option<String>,
112}
113
114impl Resource {
115 /// Returns the resource URI as a string slice.
116 pub fn as_str(&self) -> &str {
117 &self.text
118 }
119
120 /// Returns the resource as an [`http::Uri`] when it fits that representation.
121 ///
122 /// WebFinger resources can use schemes such as `acct:` that are valid URI strings but do not
123 /// expose a host through [`http::Uri`]. This accessor is mainly useful for hierarchical
124 /// resources such as `https://example.org/users/carol`.
125 pub fn uri(&self) -> Option<Uri> {
126 Uri::try_from(self.as_str()).ok()
127 }
128
129 /// Returns the host from the resource's [`http::Uri`] representation, when present.
130 ///
131 /// URI schemes such as `acct:` do not have a host in [`http::Uri`], so this returns `None` for
132 /// those resources.
133 pub fn host(&self) -> Option<&str> {
134 self.host.as_deref()
135 }
136}
137
138impl fmt::Display for Resource {
139 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
140 f.write_str(&self.text)
141 }
142}
143
144impl AsRef<str> for Resource {
145 fn as_ref(&self) -> &str {
146 self.as_str()
147 }
148}
149
150impl FromStr for Resource {
151 type Err = ResourceError;
152
153 fn from_str(resource: &str) -> Result<Self, Self::Err> {
154 let host = validate_resource(resource)?;
155 Ok(Self {
156 text: resource.to_string(),
157 host,
158 })
159 }
160}
161
162impl TryFrom<String> for Resource {
163 type Error = ResourceError;
164
165 fn try_from(resource: String) -> Result<Self, Self::Error> {
166 let host = validate_resource(&resource)?;
167 Ok(Self {
168 text: resource,
169 host,
170 })
171 }
172}
173
174impl TryFrom<&str> for Resource {
175 type Error = ResourceError;
176
177 fn try_from(resource: &str) -> Result<Self, Self::Error> {
178 resource.parse()
179 }
180}
181
182fn validate_resource(resource: &str) -> Result<Option<String>, ResourceError> {
183 let Some(scheme) = scheme(resource) else {
184 return Err(ResourceError::RelativeReference);
185 };
186 if !resource.is_ascii() {
187 return Err(ResourceError::InvalidCharacters);
188 }
189 validate_uri_characters(resource)?;
190 validate_percent_escapes(resource)?;
191 if scheme.eq_ignore_ascii_case("http") || scheme.eq_ignore_ascii_case("https") {
192 // WebFinger only needs host inference for hierarchical HTTP(S) resources. RFC 3986
193 // section 3.2 attaches an authority to URIs that begin their hier-part with `//`; opaque
194 // forms like `http:foo` must not produce a synthetic host.
195 if !resource[scheme.len()..].starts_with("://") {
196 return Err(ResourceError::MissingHttpAuthority);
197 }
198 let uri = Uri::try_from(resource).map_err(ResourceError::InvalidHttpUri)?;
199 let Some(host) = uri.host() else {
200 return Err(ResourceError::MissingHttpAuthority);
201 };
202 return Ok(Some(host.to_string()));
203 }
204 Ok(None)
205}
206
207fn validate_percent_escapes(resource: &str) -> Result<(), ResourceError> {
208 let mut bytes = resource.as_bytes().iter();
209 while let Some(byte) = bytes.next() {
210 if *byte != b'%' {
211 continue;
212 }
213 let Some(high) = bytes.next() else {
214 return Err(ResourceError::InvalidPercentEncoding);
215 };
216 let Some(low) = bytes.next() else {
217 return Err(ResourceError::InvalidPercentEncoding);
218 };
219 if !high.is_ascii_hexdigit() || !low.is_ascii_hexdigit() {
220 return Err(ResourceError::InvalidPercentEncoding);
221 }
222 }
223 Ok(())
224}
225
226fn validate_uri_characters(resource: &str) -> Result<(), ResourceError> {
227 if resource.bytes().all(is_uri_character) {
228 Ok(())
229 } else {
230 Err(ResourceError::InvalidCharacters)
231 }
232}
233
234fn is_uri_character(byte: u8) -> bool {
235 matches!(
236 byte,
237 b'A'..=b'Z'
238 | b'a'..=b'z'
239 | b'0'..=b'9'
240 | b'-'
241 | b'.'
242 | b'_'
243 | b'~'
244 | b':'
245 | b'/'
246 | b'?'
247 | b'#'
248 | b'['
249 | b']'
250 | b'@'
251 | b'!'
252 | b'$'
253 | b'&'
254 | b'\''
255 | b'('
256 | b')'
257 | b'*'
258 | b'+'
259 | b','
260 | b';'
261 | b'='
262 | b'%'
263 )
264}
265
266fn scheme(resource: &str) -> Option<&str> {
267 let mut bytes = resource.bytes();
268 let first = bytes.next()?;
269 if !first.is_ascii_alphabetic() {
270 return None;
271 }
272
273 for (index, byte) in bytes.enumerate() {
274 match byte {
275 b':' => return Some(&resource[..index + 1]),
276 b'/' | b'?' | b'#' => return None,
277 b'A'..=b'Z' | b'a'..=b'z' | b'0'..=b'9' | b'+' | b'-' | b'.' => {}
278 _ => return None,
279 }
280 }
281 None
282}
283
284#[cfg(test)]
285mod tests {
286 use super::*;
287
288 /// Accepts `acct:` resources because they are absolute URIs with a scheme.
289 #[test]
290 fn accepts_acct_resource() {
291 let resource = "acct:carol@example.com".parse::<Resource>().unwrap();
292
293 assert_eq!(resource.as_str(), "acct:carol@example.com");
294 }
295
296 /// Accepts hierarchical HTTPS resources with an authority.
297 #[test]
298 fn accepts_https_resource() {
299 let resource = "https://example.org/users/carol"
300 .parse::<Resource>()
301 .unwrap();
302
303 assert_eq!(resource.as_str(), "https://example.org/users/carol");
304 assert_eq!(resource.host(), Some("example.org"));
305 }
306
307 /// Accepts scheme-specific opaque-looking URIs.
308 ///
309 /// RFC 3986's `URI` production requires a scheme but allows a scheme-specific path without an
310 /// authority. WebFinger commonly uses this shape for `acct:` resources.
311 #[test]
312 fn accepts_scheme_specific_resource() {
313 let resource = "urn:example:animal:ferret:nose"
314 .parse::<Resource>()
315 .unwrap();
316
317 assert_eq!(resource.as_str(), "urn:example:animal:ferret:nose");
318 }
319
320 /// Rejects relative references that `http::Uri` can otherwise parse.
321 #[test]
322 fn rejects_relative_resource_references() {
323 for resource in ["carol", "/relative", "../x", ""] {
324 let error = resource.parse::<Resource>().unwrap_err();
325
326 assert!(
327 matches!(error, ResourceError::RelativeReference),
328 "expected relative-resource error for {resource:?}, got {error:?}",
329 );
330 }
331 }
332
333 /// Rejects raw non-ASCII resource text.
334 ///
335 /// RFC 3986 URI syntax is ASCII. Non-ASCII data must be percent-encoded inside the resource URI
336 /// itself before it is put into the WebFinger query parameter.
337 #[test]
338 fn rejects_non_ascii_resource_text() {
339 let error = "acct:carolé@example.org".parse::<Resource>().unwrap_err();
340
341 assert!(
342 matches!(error, ResourceError::InvalidCharacters),
343 "expected invalid-character error, got {error:?}",
344 );
345 }
346
347 /// Rejects raw ASCII characters outside the RFC 3986 URI character set.
348 #[test]
349 fn rejects_invalid_raw_uri_characters() {
350 for resource in [
351 "acct:carol{bad}@example.org",
352 "acct:carol|bad@example.org",
353 "acct:carol^bad@example.org",
354 "acct:carol`bad@example.org",
355 ] {
356 let error = resource.parse::<Resource>().unwrap_err();
357
358 assert!(
359 matches!(error, ResourceError::InvalidCharacters),
360 "expected invalid-character error for {resource:?}, got {error:?}",
361 );
362 }
363 }
364
365 /// Accepts characters outside the raw URI character set when they are percent-encoded.
366 #[test]
367 fn accepts_percent_encoded_invalid_raw_characters() {
368 let resource = "acct:carol%7Bbad%7D@example.org"
369 .parse::<Resource>()
370 .unwrap();
371
372 assert_eq!(resource.as_str(), "acct:carol%7Bbad%7D@example.org");
373 }
374
375 /// Rejects malformed percent escape syntax inside resource URIs.
376 ///
377 /// Percent escapes belong to the resource URI itself after the outer WebFinger query has been
378 /// decoded, so malformed escapes must be rejected at the resource boundary too.
379 #[test]
380 fn rejects_malformed_resource_percent_escape() {
381 let error = "acct:carol%GG@example.org".parse::<Resource>().unwrap_err();
382
383 assert!(
384 matches!(error, ResourceError::InvalidPercentEncoding),
385 "expected invalid-percent-encoding error, got {error:?}",
386 );
387 }
388
389 /// Rejects HTTP and HTTPS resources that omit the required authority.
390 #[test]
391 fn rejects_http_resources_without_authority() {
392 for resource in ["http:foo", "https:foo", "http:/example.org/path"] {
393 let error = resource.parse::<Resource>().unwrap_err();
394
395 assert!(
396 matches!(error, ResourceError::MissingHttpAuthority),
397 "expected missing-authority error for {resource:?}, got {error:?}",
398 );
399 }
400 }
401
402 /// Validates HTTP and HTTPS resource authorities regardless of scheme case.
403 ///
404 /// URI schemes are case-insensitive, so uppercase `HTTPS` should not bypass the stricter
405 /// hierarchical URI validation used for HTTP resources.
406 #[test]
407 fn rejects_invalid_https_authority_with_uppercase_scheme() {
408 let error = "HTTPS://[::1".parse::<Resource>().unwrap_err();
409
410 assert!(
411 matches!(error, ResourceError::InvalidHttpUri(_)),
412 "expected invalid-authority error, got {error:?}",
413 );
414 }
415}