async_coap_uri/
uri_raw_components.rs

1// Copyright 2019 Google LLC
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//     https://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14//
15
16use super::*;
17
18use std::fmt::{Display, Formatter};
19use std::str::FromStr;
20
21/// Struct that holds parsed URI components.
22///
23/// Internally, all components are referenced in raw/escaped form,
24/// but this type does provide methods for convenient decoded/unescaped access.
25///
26/// Instances of this type are usually created by calling a method named `components()`
27/// on the URI type you are working with.
28///
29/// That this struct implements [`AnyUriRef`], allowing it to be used
30/// as an argument wherever a [`AnyUriRef`] is accepted.
31#[derive(Debug, Eq, Clone, Copy, PartialEq, Hash)]
32pub struct UriRawComponents<'a> {
33    pub(crate) scheme: Option<&'a str>,
34    pub(crate) authority: Option<&'a str>,
35    pub(crate) userinfo: Option<&'a str>,
36    pub(crate) host: Option<&'a str>,
37    pub(crate) port: Option<u16>,
38    pub(crate) path: &'a str,
39    pub(crate) query: Option<&'a str>,
40    pub(crate) fragment: Option<&'a str>,
41}
42
43impl AnyUriRef for UriRawComponents<'_> {
44    /// Note that the implementation of this method for [`UriRawComponents`] ignores
45    /// the value of `self.userinfo`, `self.host`, and `self.port`; instead relying entirely
46    /// on `self.authority`.
47    fn write_to<T: core::fmt::Write + ?Sized>(&self, f: &mut T) -> Result<(), core::fmt::Error> {
48        // Note that everything in `self` is already escaped, so we
49        // don't need to do that here.
50        if let Some(scheme) = self.scheme {
51            f.write_str(scheme)?;
52            f.write_char(':')?;
53        }
54
55        if let Some(authority) = self.authority {
56            f.write_str("//")?;
57            f.write_str(authority)?;
58        }
59
60        f.write_str(self.path)?;
61
62        if let Some(query) = self.query {
63            f.write_char('?')?;
64            f.write_str(query)?;
65        }
66
67        if let Some(fragment) = self.fragment {
68            f.write_char('#')?;
69            f.write_str(fragment)?;
70        }
71
72        Ok(())
73    }
74
75    fn is_empty(&self) -> bool {
76        self.scheme.is_none()
77            && self.authority.is_none()
78            && self.path.is_empty()
79            && self.query.is_none()
80            && self.fragment.is_none()
81    }
82
83    fn components(&self) -> UriRawComponents<'_> {
84        self.clone()
85    }
86
87    fn uri_type(&self) -> UriType {
88        if self.authority.is_some() {
89            if self.scheme.is_some() {
90                return UriType::Uri;
91            } else {
92                return UriType::NetworkPath;
93            }
94        } else if self.scheme.is_some() {
95            if self.path.starts_with('/') {
96                return UriType::UriNoAuthority;
97            } else {
98                return UriType::UriCannotBeABase;
99            }
100        } else if self.path.starts_with('/') {
101            return UriType::AbsolutePath;
102        } else if self.path.is_empty() {
103            if self.query.is_some() {
104                return UriType::Query;
105            } else if self.fragment.is_some() {
106                return UriType::Fragment;
107            }
108        }
109
110        return UriType::RelativePath;
111    }
112}
113
114impl Display for UriRawComponents<'_> {
115    fn fmt(&self, f: &mut Formatter<'_>) -> Result<(), std::fmt::Error> {
116        self.write_to(f)
117    }
118}
119
120impl From<UriRawComponents<'_>> for String {
121    fn from(comp: UriRawComponents<'_>) -> Self {
122        String::from(&comp)
123    }
124}
125
126impl From<&UriRawComponents<'_>> for String {
127    fn from(comp: &UriRawComponents<'_>) -> Self {
128        comp.to_string()
129    }
130}
131
132impl<'a> UriRawComponents<'a> {
133    /// Constructs a new `UriRawComponents` from the given string slice, which is assumed
134    /// to contain a URI-reference.
135    pub fn from_str(uri: &'a str) -> Result<UriRawComponents<'a>, ParseError> {
136        if let Some(i) = uri.unescape_uri().first_error() {
137            return Err(ParseError::new(
138                "Bad percent encoding or illegal characters",
139                Some(i..i + 1),
140            ));
141        }
142
143        let captures = match RFC3986_APPENDIX_B.captures(uri) {
144            Some(x) => x,
145            None => {
146                return Err(ParseError::new(
147                    "Cannot find URI components",
148                    Some(0..uri.len()),
149                ));
150            }
151        };
152
153        let scheme = if let Some(x) = captures.get(2) {
154            // Do an additional syntax check on the scheme to make sure it is valid.
155            if URI_CHECK_SCHEME.captures(x.as_str()).is_some() {
156                Some(x.as_str())
157            } else {
158                return Err(ParseError::new(
159                    "Invalid URI scheme",
160                    Some(x.start()..x.end()),
161                ));
162            }
163        } else {
164            None
165        };
166
167        let authority = captures.get(4).map(|x| x.as_str());
168        let query = captures.get(7).map(|x| x.as_str());
169        let fragment = captures.get(9).map(|x| x.as_str());
170
171        // Unwrap safety: Capture 5 is not an optional capture in the regex.
172        let path = captures.get(5).unwrap().as_str();
173
174        unsafe {
175            Ok(UriRawComponents::from_components_unchecked(
176                scheme, authority, path, query, fragment,
177            ))
178        }
179    }
180
181    #[inline(always)]
182    /// Returns the slice of the URI that describes the URI scheme, if present.
183    /// Percent encoding is not allowed in the scheme, so no decoding is required.
184    ///
185    pub fn scheme(&self) -> Option<&'a str> {
186        self.scheme
187    }
188
189    /// Returns the escaped slice of the URI that contains the "authority", if present.
190    ///
191    /// See [`UriRawComponents::authority`] for the percent-decoded version.
192    #[inline(always)]
193    pub fn raw_authority(&self) -> Option<&'a str> {
194        self.authority
195    }
196
197    /// Returns the escaped slice of the URI that contains the "userinfo", if present.
198    ///
199    /// See [`UriRawComponents::userinfo`] for the percent-decoded version.
200    #[inline(always)]
201    pub fn raw_userinfo(&self) -> Option<&'a str> {
202        self.userinfo
203    }
204
205    /// Returns the escaped slice of the URI that contains the "host", if present.
206    ///
207    /// See [`UriRawComponents::host`] for the percent-decoded version.
208    #[inline(always)]
209    pub fn raw_host(&self) -> Option<&'a str> {
210        self.host
211    }
212
213    /// Returns the 16-bit representation of the port number, if present in the authority.
214    #[inline(always)]
215    pub fn port(&self) -> Option<u16> {
216        self.port
217    }
218
219    /// Returns the escaped slice of the URI that contains the "path".
220    ///
221    /// See [`UriRawComponents::path`] for the percent-decoded version.
222    #[inline(always)]
223    pub fn raw_path(&self) -> &'a str {
224        self.path
225    }
226
227    /// Returns the subset of this URI that is a path, without the
228    /// scheme, authority, query, or fragment. Since this is itself
229    /// a valid relative URI, it returns a `&RelRef`.
230    pub fn path_as_rel_ref(&self) -> &'a RelRef {
231        unsafe { RelRef::from_str_unchecked(self.raw_path()) }
232    }
233
234    /// Returns the escaped substring of the URI that contains the "query", if present.
235    ///
236    /// See [`StrExt`] for details on unescaping the results.
237    #[inline(always)]
238    pub fn raw_query(&self) -> Option<&'a str> {
239        self.query
240    }
241
242    /// Returns the escaped substring of the URI that contains the "fragment", if present.
243    ///
244    /// See [`UriRawComponents::fragment`] for the percent-decoded version.
245    #[inline(always)]
246    pub fn raw_fragment(&self) -> Option<&'a str> {
247        self.fragment
248    }
249
250    /// An iterator which returns each individual *escaped* path item.
251    ///
252    /// See [`UriRawComponents::path_segments`] for the percent-decoded version.
253    pub fn raw_path_segments(&self) -> impl Iterator<Item = &'a str> {
254        if self.path.is_empty() {
255            let mut ret = "".split('/');
256            let _ = ret.next();
257            return ret;
258        } else {
259            self.path.split('/')
260        }
261    }
262
263    /// An iterator which returns each individual *escaped* query item.
264    ///
265    /// See [`UriRawComponents::query_items`] for the percent-decoded version.
266    pub fn raw_query_items(&self) -> impl Iterator<Item = &'a str> {
267        let pattern = |c| c == '&' || c == ';';
268        match self.query {
269            Some(query) => query.split(pattern),
270            None => {
271                let mut ret = "".split(pattern);
272                let _ = ret.next();
273                return ret;
274            }
275        }
276    }
277
278    /// An iterator which returns each individual *escaped* query item as a
279    /// key/value pair. Note that neither are unescaped.
280    ///
281    /// See [`UriRawComponents::query_key_values`] for the percent-decoded version.
282    pub fn raw_query_key_values(&self) -> impl Iterator<Item = (&'a str, &'a str)> {
283        self.raw_query_items().map(|comp| match comp.find('=') {
284            Some(x) => comp.split_at(x),
285            None => (comp, ""),
286        })
287    }
288
289    /// Unescaped (percent-decoded) version of [`UriRawComponents::raw_fragment`], using
290    /// `std::borrow::Cow<str>` instead of `&str`.
291    pub fn fragment(&self) -> Option<Cow<'_, str>> {
292        self.raw_fragment().map(|f| f.unescape_uri().to_cow())
293    }
294
295    /// Unescaped (percent-decoded) version of [`UriRawComponents::raw_host`], using
296    /// `std::borrow::Cow<str>` instead of `&str`.
297    pub fn host(&self) -> Option<Cow<'_, str>> {
298        self.raw_host().map(|f| f.unescape_uri().to_cow())
299    }
300
301    /// Unescaped (percent-decoded) version of [`UriRawComponents::raw_authority`], using
302    /// `std::borrow::Cow<str>` instead of `&str`.
303    pub fn authority(&self) -> Option<Cow<'_, str>> {
304        self.raw_authority().map(|f| f.unescape_uri().to_cow())
305    }
306
307    /// Unescaped (percent-decoded) version of [`UriRawComponents::raw_userinfo`], using
308    /// `std::borrow::Cow<str>` instead of `&str`.
309    pub fn userinfo(&self) -> Option<Cow<'_, str>> {
310        self.raw_userinfo().map(|f| f.unescape_uri().to_cow())
311    }
312
313    /// Unescaped (percent-decoded) version of [`UriRawComponents::raw_query`], using
314    /// `std::borrow::Cow<str>` instead of `&str`.
315    pub fn query(&self) -> Option<Cow<'_, str>> {
316        self.raw_query().map(|f| f.unescape_uri().to_cow())
317    }
318
319    /// Unescaped (percent-decoded) version of [`UriRawComponents::raw_path_segments`], using
320    /// `std::borrow::Cow<str>` instead of `&str`.
321    pub fn path_segments(&self) -> impl Iterator<Item = Cow<'_, str>> {
322        self.raw_path_segments()
323            .map(|item| item.unescape_uri().to_cow())
324    }
325
326    /// Unescaped (percent-decoded) version of [`UriRawComponents::raw_query_items`], using
327    /// `std::borrow::Cow<str>` instead of `&str`.
328    pub fn query_items(&self) -> impl Iterator<Item = Cow<'_, str>> {
329        self.raw_query_items()
330            .map(|item| item.unescape_uri().to_cow())
331    }
332
333    /// Unescaped (percent-decoded) version of [`UriRawComponents::raw_query_key_values`], using
334    /// `std::borrow::Cow<str>` instead of `&str`.
335    pub fn query_key_values(&self) -> impl Iterator<Item = (Cow<'_, str>, Cow<'_, str>)> {
336        self.raw_query_key_values().map(|item| {
337            (
338                item.0.unescape_uri().to_cow(),
339                item.1.unescape_uri().to_cow(),
340            )
341        })
342    }
343
344    /// Returns a `UriRawComponents` with any leading dot-slashes trimmed from the path.
345    #[must_use]
346    pub fn trim_leading_dot_slashes(&self) -> Self {
347        UriRawComponents {
348            path: self.path_as_rel_ref().trim_leading_dot_slashes(),
349            ..self.clone()
350        }
351    }
352
353    /// Returns a `UriRawComponents` with the query and fragment cleared.
354    #[must_use]
355    pub fn trim_query(&self) -> Self {
356        UriRawComponents {
357            query: None,
358            fragment: None,
359            ..self.clone()
360        }
361    }
362
363    /// Returns a `UriRawComponents` with the query cleared.
364    #[must_use]
365    pub fn trim_fragment(&self) -> Self {
366        UriRawComponents {
367            fragment: None,
368            ..self.clone()
369        }
370    }
371}
372
373impl<'a> UriRawComponents<'a> {
374    /// Constructs a new `UriRawComponents` from the given raw, percent-encoded components,
375    /// without checking that the components are valid.
376    ///
377    /// This method is unsafe because the components are not checked to ensure they are valid.
378    pub unsafe fn from_components_unchecked(
379        scheme: Option<&'a str>,
380        authority: Option<&'a str>,
381        path: &'a str,
382        query: Option<&'a str>,
383        fragment: Option<&'a str>,
384    ) -> UriRawComponents<'a> {
385        let userinfo;
386        let host;
387        let port;
388
389        if let Some(authority) = authority {
390            match URI_AUTHORITY.captures(authority) {
391                Some(y) => {
392                    userinfo = if let Some(x) = y.get(2) {
393                        Some(x.as_str())
394                    } else {
395                        None
396                    };
397                    host = if let Some(x) = y.get(3) {
398                        Some(x.as_str())
399                    } else {
400                        None
401                    };
402                    port = if let Some(x) = y.get(5) {
403                        u16::from_str(x.as_str()).ok()
404                    } else {
405                        None
406                    };
407                }
408                None => {
409                    userinfo = None;
410                    host = None;
411                    port = None;
412                }
413            }
414        } else {
415            userinfo = None;
416            host = None;
417            port = None;
418        };
419
420        UriRawComponents {
421            scheme,
422            authority,
423            userinfo,
424            host,
425            port,
426            path,
427            query,
428            fragment,
429        }
430    }
431}
432
433#[cfg(test)]
434mod tests {
435    use super::*;
436
437    #[test]
438    fn components() {
439        {
440            let uri = uri_ref!("http://example.com/");
441            let components = uri.components();
442            assert!(!components.uri_type().cannot_be_a_base());
443            assert_eq!(Some("http"), components.scheme());
444            assert_eq!(Some("example.com"), components.raw_host());
445            assert_eq!(None, components.port());
446            assert_eq!("/", components.raw_path());
447            assert_eq!(None, components.raw_userinfo());
448            assert_eq!(None, components.raw_fragment());
449            assert_eq!(None, components.raw_query());
450        }
451
452        {
453            let uri = UriRefBuf::from_str("http://example.com/").unwrap();
454            let uri_ref = uri.as_uri_ref();
455            let components = uri_ref.components();
456            assert!(!components.uri_type().cannot_be_a_base());
457            assert_eq!(Some("http"), components.scheme());
458            assert_eq!(Some("example.com"), components.raw_host());
459            assert_eq!(None, components.port());
460            assert_eq!("/", components.raw_path());
461            assert_eq!(None, components.raw_userinfo());
462            assert_eq!(None, components.raw_fragment());
463            assert_eq!(None, components.raw_query());
464        }
465
466        {
467            let uri = UriRefBuf::from_str("mailto:fred@example.com").unwrap();
468            let uri_ref = uri.as_uri_ref();
469            let components = uri_ref.components();
470            assert!(components.uri_type().cannot_be_a_base());
471            assert_eq!(Some("mailto"), components.scheme());
472            assert_eq!(None, components.raw_host());
473            assert_eq!(None, components.port());
474            assert_eq!("fred@example.com", components.raw_path());
475            assert_eq!(None, components.raw_userinfo());
476            assert_eq!(None, components.raw_fragment());
477            assert_eq!(None, components.raw_query());
478        }
479
480        let component_test_table = vec![
481            (
482                "http://goo.gl/a/b/c/d?query",
483                vec!["", "a", "b", "c", "d"],
484                vec!["query"],
485                rel_ref!("/a/b/c/d?query"),
486            ),
487            (
488                "http://goo.gl/a/b/c/d",
489                vec!["", "a", "b", "c", "d"],
490                vec![],
491                rel_ref!("/a/b/c/d"),
492            ),
493            (
494                "http://goo.gl/a/b/c/d/",
495                vec!["", "a", "b", "c", "d", ""],
496                vec![],
497                rel_ref!("/a/b/c/d/"),
498            ),
499            (
500                "/a/b/c/d/",
501                vec!["", "a", "b", "c", "d", ""],
502                vec![],
503                rel_ref!("/a/b/c/d/"),
504            ),
505            (
506                "a/b/c/d/",
507                vec!["a", "b", "c", "d", ""],
508                vec![],
509                rel_ref!("a/b/c/d/"),
510            ),
511            (
512                "a/b//c/d/",
513                vec!["a", "b", "", "c", "d", ""],
514                vec![],
515                rel_ref!("a/b//c/d/"),
516            ),
517            (
518                "a/b/c/d/?",
519                vec!["a", "b", "c", "d", ""],
520                vec![""],
521                rel_ref!("a/b/c/d/?"),
522            ),
523            (
524                "a?b=1;c=2;d=3",
525                vec!["a"],
526                vec!["b=1", "c=2", "d=3"],
527                rel_ref!("a?b=1;c=2;d=3"),
528            ),
529            (
530                "a?b=1&c=2&d=3",
531                vec!["a"],
532                vec!["b=1", "c=2", "d=3"],
533                rel_ref!("a?b=1&c=2&d=3"),
534            ),
535            (
536                "a/b/%47/d/",
537                vec!["a", "b", "%47", "d", ""],
538                vec![],
539                rel_ref!("a/b/%47/d/"),
540            ),
541        ];
542
543        for (a, b, c, d) in component_test_table {
544            let uri = UriRef::from_str(a).unwrap();
545            let components = uri.components();
546            let path_components: Vec<&str> = components.raw_path_segments().collect();
547            let query_components: Vec<&str> = components.raw_query_items().collect();
548            let uri_rel: &RelRef = uri.path_query_as_rel_ref();
549            assert_eq!(b, path_components);
550            assert_eq!(c, query_components);
551            assert_eq!(
552                d, uri_rel,
553                "Expected <{}>, Found <{}> (Item: <{}>)",
554                d, uri_rel, a
555            );
556        }
557    }
558}