lychee_lib/types/uri/
valid.rs

1use std::{convert::TryFrom, fmt::Display, net::IpAddr};
2
3use email_address::EmailAddress;
4use ip_network::Ipv6Network;
5use serde::{Deserialize, Serialize};
6use url::Url;
7
8use crate::{ErrorKind, Result};
9
10use super::raw::RawUri;
11
12/// Lychee's own representation of a URI, which encapsulates all supported
13/// formats.
14///
15/// If the scheme is `mailto`, it's a mail address.
16/// Otherwise it's treated as a website URL.
17#[derive(Clone, Debug, PartialOrd, Ord, PartialEq, Eq, Hash, Serialize, Deserialize)]
18pub struct Uri {
19    /// Website URL or mail address
20    pub(crate) url: Url,
21}
22
23impl Uri {
24    /// Returns the string representation of the `Uri`.
25    #[inline]
26    #[must_use]
27    pub fn as_str(&self) -> &str {
28        self.url.as_ref()
29    }
30
31    #[inline]
32    #[must_use]
33    /// Returns the scheme of the URI (e.g. `http` or `mailto`)
34    pub fn scheme(&self) -> &str {
35        self.url.scheme()
36    }
37
38    #[inline]
39    /// Changes this URL's scheme.
40    pub(crate) fn set_scheme(&mut self, scheme: &str) -> std::result::Result<(), ()> {
41        self.url.set_scheme(scheme)
42    }
43
44    #[inline]
45    #[must_use]
46    /// Returns the domain of the URI (e.g. `example.com`)
47    pub fn domain(&self) -> Option<&str> {
48        self.url.domain()
49    }
50
51    #[inline]
52    #[must_use]
53    /// Returns the path of the URI (e.g. `/path/to/resource`)
54    pub fn path(&self) -> &str {
55        self.url.path()
56    }
57
58    #[inline]
59    #[must_use]
60    /// Unless this URL is cannot-be-a-base,
61    /// return an iterator of '/' slash-separated path segments,
62    /// each as a percent-encoded ASCII string.
63    ///
64    /// Return `None` for cannot-be-a-base URLs.
65    pub fn path_segments(&self) -> Option<std::str::Split<'_, char>> {
66        self.url.path_segments()
67    }
68
69    #[must_use]
70    /// Returns the IP address (either IPv4 or IPv6) of the URI,
71    /// or `None` if it is a domain
72    pub fn host_ip(&self) -> Option<IpAddr> {
73        match self.url.host()? {
74            url::Host::Domain(_) => None,
75            url::Host::Ipv4(v4_addr) => Some(v4_addr.into()),
76            url::Host::Ipv6(v6_addr) => Some(v6_addr.into()),
77        }
78    }
79
80    /// Create a new URI with a `https` scheme
81    pub(crate) fn to_https(&self) -> Result<Uri> {
82        let mut https_uri = self.clone();
83        https_uri
84            .set_scheme("https")
85            .map_err(|()| ErrorKind::InvalidURI(self.clone()))?;
86        Ok(https_uri)
87    }
88
89    #[inline]
90    #[must_use]
91    /// Check if the URI is a valid mail address
92    pub fn is_mail(&self) -> bool {
93        self.scheme() == "mailto"
94    }
95
96    #[inline]
97    #[must_use]
98    /// Check if the URI is a tel
99    pub fn is_tel(&self) -> bool {
100        self.scheme() == "tel"
101    }
102
103    #[inline]
104    #[must_use]
105    /// Check if the URI is a file
106    pub fn is_file(&self) -> bool {
107        self.scheme() == "file"
108    }
109
110    #[inline]
111    #[must_use]
112    /// Check if the URI is a `data` URI
113    pub fn is_data(&self) -> bool {
114        self.scheme() == "data"
115    }
116
117    #[inline]
118    #[must_use]
119    /// Returns `true` if this is a loopback address.
120    ///
121    /// ## IPv4
122    ///
123    /// This is a loopback address (`127.0.0.0/8`).
124    ///
125    /// This property is defined by [IETF RFC 1122].
126    ///
127    /// ## IPv6
128    ///
129    /// This is the loopback address (`::1`), as defined in [IETF RFC 4291 section 2.5.3].
130    ///
131    /// [IETF RFC 1122]: https://tools.ietf.org/html/rfc1122
132    /// [IETF RFC 4291 section 2.5.3]: https://tools.ietf.org/html/rfc4291#section-2.5.3
133    pub fn is_loopback(&self) -> bool {
134        match self.url.host() {
135            Some(url::Host::Ipv4(addr)) => addr.is_loopback(),
136            Some(url::Host::Ipv6(addr)) => addr.is_loopback(),
137            _ => false,
138        }
139    }
140
141    #[inline]
142    #[must_use]
143    /// Returns `true` if this is a private IPv4 address, a unique local IPv6 address (`fc00::/7`).
144    ///
145    /// # IPv4
146    ///
147    /// The private address ranges are defined in [IETF RFC 1918] and include:
148    ///
149    ///  - `10.0.0.0/8`
150    ///  - `172.16.0.0/12`
151    ///  - `192.168.0.0/16`
152    ///
153    /// # IPv6
154    ///
155    /// Unique local address is defined in [IETF RFC 4193].
156    ///
157    /// ## Note
158    ///
159    /// Unicast site-local network was defined in [IETF RFC 4291], but was fully deprecated in
160    /// [IETF RFC 3879]. So it is **NOT** considered as private on this purpose.
161    ///
162    /// [IETF RFC 1918]: https://tools.ietf.org/html/rfc1918
163    /// [IETF RFC 4193]: https://tools.ietf.org/html/rfc4193
164    /// [IETF RFC 4291]: https://tools.ietf.org/html/rfc4291
165    /// [IETF RFC 3879]: https://tools.ietf.org/html/rfc3879
166    pub fn is_private(&self) -> bool {
167        match self.url.host() {
168            Some(url::Host::Ipv4(addr)) => addr.is_private(),
169            Some(url::Host::Ipv6(addr)) => Ipv6Network::from(addr).is_unique_local(),
170            _ => false,
171        }
172    }
173
174    #[inline]
175    #[must_use]
176    /// Returns `true` if the address is a link-local IPv4 address (`169.254.0.0/16`),
177    /// or an IPv6 unicast address with link-local scope (`fe80::/10`).
178    ///
179    /// # IPv4
180    ///
181    /// Link-local address is defined by [IETF RFC 3927].
182    ///
183    /// # IPv6
184    ///
185    /// Unicast address with link-local scope is defined in [IETF RFC 4291].
186    ///
187    /// [IETF RFC 3927]: https://tools.ietf.org/html/rfc3927
188    /// [IETF RFC 4291]: https://tools.ietf.org/html/rfc4291
189    pub fn is_link_local(&self) -> bool {
190        match self.url.host() {
191            Some(url::Host::Ipv4(addr)) => addr.is_link_local(),
192            Some(url::Host::Ipv6(addr)) => Ipv6Network::from(addr).is_unicast_link_local(),
193            _ => false,
194        }
195    }
196}
197
198impl AsRef<str> for Uri {
199    fn as_ref(&self) -> &str {
200        self.as_str()
201    }
202}
203
204impl From<Url> for Uri {
205    fn from(url: Url) -> Self {
206        Self { url }
207    }
208}
209
210impl TryFrom<String> for Uri {
211    type Error = ErrorKind;
212
213    fn try_from(s: String) -> Result<Self> {
214        Uri::try_from(s.as_ref())
215    }
216}
217
218impl TryFrom<&str> for Uri {
219    type Error = ErrorKind;
220
221    /// Create a new URI from a string
222    ///
223    /// Note:
224    /// We do not handle relative URLs here, as we do not know the base URL.
225    /// Furthermore paths also cannot be resolved, as we do not know the file system.
226    ///
227    /// # Errors
228    ///
229    /// Returns an error if the string is not a valid URI
230    ///
231    fn try_from(s: &str) -> Result<Self> {
232        // Empty strings are accepted when being parsed with `Url::parse`,
233        // but we don't want to accept them because there is no clear definition
234        // of "validity" in this case.
235        if s.is_empty() {
236            return Err(ErrorKind::EmptyUrl);
237        }
238
239        match Url::parse(s) {
240            Ok(uri) => Ok(uri.into()),
241            Err(err) => {
242                // This could be a relative URL or a mail address or something
243                // else entirely. Try the mail address check first, as it's the
244                // most common case. Note that we use a relatively weak check
245                // here because
246                // - `fast_chemail::parse_email` does not accept parameters
247                //   (`foo@example?subject=bar`), which are common for website
248                //   contact forms
249                // - `check_if_email_exists` does additional spam detection,
250                //   which we only want to execute when checking the email
251                //   addresses, but not when printing all links with `--dump`.
252                if EmailAddress::is_valid(s) {
253                    // Use the `mailto:` scheme for mail addresses,
254                    // which will allow `Url::parse` to parse them.
255                    if let Ok(uri) = Url::parse(&format!("mailto:{s}")) {
256                        return Ok(uri.into());
257                    }
258                }
259
260                // We do not handle relative URLs here, as we do not know the base URL.
261                Err(ErrorKind::ParseUrl(err, s.to_owned()))
262            }
263        }
264    }
265}
266
267impl TryFrom<RawUri> for Uri {
268    type Error = ErrorKind;
269
270    fn try_from(raw_uri: RawUri) -> Result<Self> {
271        let s = raw_uri.text;
272        Uri::try_from(s.as_ref())
273    }
274}
275
276impl Display for Uri {
277    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
278        f.write_str(self.as_str())
279    }
280}
281
282#[cfg(test)]
283mod tests {
284    use super::*;
285    use std::{
286        convert::TryFrom,
287        net::{IpAddr, Ipv4Addr, Ipv6Addr},
288    };
289    use test_utils::mail;
290    use test_utils::website;
291
292    #[test]
293    fn test_ipv4_uri_is_loopback() {
294        let uri = Uri::try_from("http://127.0.0.0").unwrap();
295        assert!(uri.is_loopback());
296    }
297
298    #[test]
299    fn test_ipv6_uri_is_loopback() {
300        let uri = Uri::try_from("https://[::1]").unwrap();
301        assert!(uri.is_loopback());
302    }
303
304    #[test]
305    fn test_uri_from_url() {
306        assert!(Uri::try_from("").is_err());
307        assert_eq!(
308            Uri::try_from("https://example.com"),
309            Ok(website!("https://example.com"))
310        );
311        assert_eq!(
312            Uri::try_from("https://example.com/@test/testing"),
313            Ok(website!("https://example.com/@test/testing"))
314        );
315    }
316
317    #[test]
318    fn test_uri_from_email_str() {
319        assert_eq!(
320            Uri::try_from("mail@example.com"),
321            Ok(mail!("mail@example.com"))
322        );
323        assert_eq!(
324            Uri::try_from("mailto:mail@example.com"),
325            Ok(mail!("mail@example.com"))
326        );
327        assert_eq!(
328            Uri::try_from("mail@example.com?foo=bar"),
329            Ok(mail!("mail@example.com?foo=bar"))
330        );
331    }
332
333    #[test]
334    fn test_uri_tel() {
335        assert_eq!(
336            Uri::try_from("tel:1234567890"),
337            Ok(Uri::try_from("tel:1234567890").unwrap())
338        );
339    }
340
341    #[test]
342    fn test_uri_host_ip_v4() {
343        assert_eq!(
344            website!("http://127.0.0.1").host_ip(),
345            Some(IpAddr::V4(Ipv4Addr::LOCALHOST))
346        );
347    }
348
349    #[test]
350    fn test_uri_host_ip_v6() {
351        assert_eq!(
352            website!("https://[2020::0010]").host_ip(),
353            Some(IpAddr::V6(Ipv6Addr::new(0x2020, 0, 0, 0, 0, 0, 0, 0x10)))
354        );
355    }
356
357    #[test]
358    fn test_uri_host_ip_no_ip() {
359        assert!(website!("https://some.cryptic/url").host_ip().is_none());
360    }
361
362    #[test]
363    fn test_localhost() {
364        assert_eq!(
365            website!("http://127.0.0.1").host_ip(),
366            Some(IpAddr::V4(Ipv4Addr::LOCALHOST))
367        );
368    }
369
370    #[test]
371    fn test_convert_to_https() {
372        assert_eq!(
373            website!("http://example.com").to_https().unwrap(),
374            website!("https://example.com")
375        );
376
377        assert_eq!(
378            website!("https://example.com").to_https().unwrap(),
379            website!("https://example.com")
380        );
381    }
382
383    #[test]
384    fn test_file_uri() {
385        assert!(Uri::try_from("file:///path/to/file").unwrap().is_file());
386    }
387}