uriparse/
path.rs

1//! Path Component
2//!
3//! See [[RFC3986, Section 3.3](https://tools.ietf.org/html/rfc3986#section-3.3)].
4
5use std::borrow::Cow;
6use std::convert::{Infallible, TryFrom};
7use std::error::Error;
8use std::fmt::{self, Display, Formatter, Write};
9use std::hash::{Hash, Hasher};
10use std::ops::Deref;
11use std::str;
12
13use crate::utility::{
14    get_percent_encoded_value, normalize_string, percent_encoded_equality, percent_encoded_hash,
15    UNRESERVED_CHAR_MAP,
16};
17
18/// A map of byte characters that determines if a character is a valid path character.
19#[rustfmt::skip]
20const PATH_CHAR_MAP: [u8; 256] = [
21 // 0     1     2     3     4     5     6     7     8     9     A     B     C     D     E     F
22    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0, // 0
23    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0, // 1
24    0, b'!',    0,    0, b'$', b'%', b'&',b'\'', b'(', b')', b'*', b'+', b',', b'-', b'.',    0, // 2
25 b'0', b'1', b'2', b'3', b'4', b'5', b'6', b'7', b'8', b'9', b':', b';',    0, b'=',    0,    0, // 3
26 b'@', b'A', b'B', b'C', b'D', b'E', b'F', b'G', b'H', b'I', b'J', b'K', b'L', b'M', b'N', b'O', // 4
27 b'P', b'Q', b'R', b'S', b'T', b'U', b'V', b'W', b'X', b'Y', b'Z',    0,    0,    0,    0, b'_', // 5
28    0, b'a', b'b', b'c', b'd', b'e', b'f', b'g', b'h', b'i', b'j', b'k', b'l', b'm', b'n', b'o', // 6
29 b'p', b'q', b'r', b's', b't', b'u', b'v', b'w', b'x', b'y', b'z',    0,    0,    0, b'~',    0, // 7
30    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0, // 8
31    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0, // 9
32    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0, // A
33    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0, // B
34    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0, // C
35    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0, // D
36    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0, // E
37    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0, // F
38];
39
40/// The path component as defined in
41/// [[RFC3986, Section 3.3](https://tools.ietf.org/html/rfc3986#section-3.3)].
42///
43/// A path is composed of a sequence of segments. It is also either absolute or relative, where an
44/// absolute path starts with a `'/'`. A URI with an authority *always* has an absolute path
45/// regardless of whether the path was empty (i.e. "http://example.com" has a single empty
46/// path segment and is absolute).
47///
48/// Each segment in the path is case-sensitive. Furthermore, percent-encoding plays no role in
49/// equality checking for characters in the unreserved character set meaning that `"segment"` and
50/// `"s%65gment"` are identical. Both of these attributes are reflected in the equality and hash
51/// functions.
52///
53/// However, be aware that just because percent-encoding plays no role in equality checking does not
54/// mean that either the path or a given segment is normalized. If the path or a segment needs to be
55/// normalized, use either the [`Path::normalize`] or [`Segment::normalize`] functions,
56/// respectively.
57#[derive(Clone, Debug, Eq)]
58#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
59pub struct Path<'path> {
60    /// whether the path is absolute. Specifically, a path is absolute if it starts with a
61    /// `'/'`.
62    absolute: bool,
63
64    /// The total number of double dot segments in the path.
65    double_dot_segment_count: u16,
66
67    /// The number of double dot segments consecutive from the beginning of the path.
68    leading_double_dot_segment_count: u16,
69
70    /// The sequence of segments that compose the path.
71    segments: Vec<Segment<'path>>,
72
73    /// The total number of single dot segments in the path.
74    single_dot_segment_count: u16,
75
76    /// The total number of unnormalized segments in the path.
77    unnormalized_count: u16,
78}
79
80impl<'path> Path<'path> {
81    /// Clears all segments from the path leaving a single empty segment.
82    ///
83    /// # Examples
84    ///
85    /// ```
86    /// use std::convert::TryFrom;
87    ///
88    /// use uriparse::Path;
89    ///
90    /// let mut path = Path::try_from("/my/path").unwrap();
91    /// assert_eq!(path, "/my/path");
92    /// path.clear();
93    /// assert_eq!(path, "/");
94    /// ```
95    pub fn clear(&mut self) {
96        self.segments.clear();
97        self.segments.push(Segment::empty());
98    }
99
100    /// Converts the [`Path`] into an owned copy.
101    ///
102    /// If you construct the path from a source with a non-static lifetime, you may run into
103    /// lifetime problems due to the way the struct is designed. Calling this function will ensure
104    /// that the returned value has a static lifetime.
105    ///
106    /// This is different from just cloning. Cloning the path will just copy the references, and
107    /// thus the lifetime will remain the same.
108    pub fn into_owned(self) -> Path<'static> {
109        let segments = self
110            .segments
111            .into_iter()
112            .map(Segment::into_owned)
113            .collect::<Vec<Segment<'static>>>();
114
115        Path {
116            absolute: self.absolute,
117            double_dot_segment_count: self.double_dot_segment_count,
118            leading_double_dot_segment_count: self.leading_double_dot_segment_count,
119            segments,
120            single_dot_segment_count: self.single_dot_segment_count,
121            unnormalized_count: self.unnormalized_count,
122        }
123    }
124
125    /// Returns whether the path is absolute (i.e. it starts with a `'/'`).
126    ///
127    /// Any path following an [`Authority`] will *always* be parsed to be absolute.
128    ///
129    /// # Examples
130    ///
131    /// ```
132    /// use std::convert::TryFrom;
133    ///
134    /// use uriparse::Path;
135    ///
136    /// let path = Path::try_from("/my/path").unwrap();
137    /// assert_eq!(path.is_absolute(), true);
138    /// ```
139    pub fn is_absolute(&self) -> bool {
140        self.absolute
141    }
142
143    /// Returns whether the path is normalized either as or as not a reference.
144    ///
145    /// See [`Path::normalize`] for a full description of what path normalization entails.
146    ///
147    /// Although this function does not operate in constant-time in general, it will be
148    /// constant-time in the vast majority of cases.
149    ///
150    ///
151    ///
152    /// # Examples
153    ///
154    /// ```
155    /// use std::convert::TryFrom;
156    ///
157    /// use uriparse::Path;
158    ///
159    /// let path = Path::try_from("/my/path").unwrap();
160    /// assert!(path.is_normalized(false));
161    ///
162    /// let path = Path::try_from("/my/p%61th").unwrap();
163    /// assert!(!path.is_normalized(false));
164    ///
165    /// let path = Path::try_from("..").unwrap();
166    /// assert!(path.is_normalized(true));
167    ///
168    /// let path = Path::try_from("../.././.").unwrap();
169    /// assert!(!path.is_normalized(true));
170    /// ```
171    pub fn is_normalized(&self, as_reference: bool) -> bool {
172        if self.unnormalized_count != 0 {
173            return false;
174        }
175
176        if self.absolute || !as_reference {
177            self.single_dot_segment_count == 0 && self.double_dot_segment_count == 0
178        } else {
179            (self.single_dot_segment_count == 0
180                || (self.single_dot_segment_count == 1
181                    && self.segments[0].is_single_dot_segment()
182                    && self.segments.len() > 1
183                    && self.segments[1].contains(':')))
184                && self.double_dot_segment_count == self.leading_double_dot_segment_count
185        }
186    }
187
188    /// Returns whether the path is relative (i.e. it does not start with a `'/'`).
189    ///
190    /// Any path following an [`Authority`] will *always* be parsed to be absolute.
191    ///
192    /// # Examples
193    ///
194    /// ```
195    /// use std::convert::TryFrom;
196    ///
197    /// use uriparse::Path;
198    ///
199    /// let path = Path::try_from("my/path").unwrap();
200    /// assert_eq!(path.is_relative(), true);
201    /// ```
202    pub fn is_relative(&self) -> bool {
203        !self.absolute
204    }
205
206    /// Creates a path with no segments on it.
207    ///
208    /// This is only used to avoid allocations for temporary paths. Any path created using this
209    /// function is **not** valid!
210    pub(crate) unsafe fn new_with_no_segments(absolute: bool) -> Path<'static> {
211        Path {
212            absolute,
213            double_dot_segment_count: 0,
214            leading_double_dot_segment_count: 0,
215            segments: Vec::new(),
216            single_dot_segment_count: 0,
217            unnormalized_count: 0,
218        }
219    }
220
221    /// Normalizes the path and all of its segments.
222    ///
223    /// There are two components to path normalization, the normalization of each segment
224    /// individually and the removal of unnecessary dot segments. It is also guaranteed that whether
225    /// the path is absolute will not change as a result of normalization.
226    ///
227    /// The normalization of each segment will proceed according to [`Segment::normalize`].
228    ///
229    /// If the path is absolute (i.e., it starts with a `'/'`), then `as_reference` will be set to
230    /// `false` regardless of its set value.
231    ///
232    /// If `as_reference` is `false`, then all dot segments will be removed as they would be if you
233    /// had called [`Path::remove_dot_segments`]. Otherwise, when a dot segment is removed is
234    /// dependent on whether it's `"."` or `".."` and its location in the path.
235    ///
236    /// In general, `"."` dot segments are always removed except for when it is at the beginning of
237    /// the path and is followed by a segment containing a `':'`, e.g. `"./a:b"` stays the same.
238    ///
239    /// For `".."` dot segments, they are kept whenever they are at the beginning of the path and
240    /// removed whenever they are not, e.g. `"a/../.."` normalizes to `".."`.
241    pub fn normalize(&mut self, as_reference: bool) {
242        if self.is_normalized(as_reference) {
243            return;
244        }
245
246        self.unnormalized_count = 0;
247
248        if self.absolute || !as_reference {
249            self.remove_dot_segments_helper(true);
250            return;
251        }
252
253        let mut double_dot_segment_count = 0;
254        let mut last_dot_segment = None;
255        let mut new_length = 0;
256
257        for i in 0..self.segments.len() {
258            let segment = &self.segments[i];
259
260            if segment.is_single_dot_segment()
261                && (new_length > 0
262                    || i == self.segments.len() - 1
263                    || !self.segments[i + 1].as_str().contains(':'))
264            {
265                continue;
266            }
267
268            if segment.is_double_dot_segment() {
269                match last_dot_segment {
270                    None if new_length == 0 => (),
271                    Some(index) if index == new_length - 1 => (),
272                    _ => {
273                        if new_length == 2
274                            && self.segments[0].is_single_dot_segment()
275                            && (i == self.segments.len() - 1
276                                || !self.segments[i + 1].as_str().contains(':'))
277                        {
278                            new_length -= 1
279                        }
280
281                        new_length -= 1;
282
283                        continue;
284                    }
285                }
286
287                double_dot_segment_count += 1;
288                last_dot_segment = Some(new_length);
289            }
290
291            self.segments.swap(i, new_length);
292            self.segments[new_length].normalize();
293            new_length += 1;
294        }
295
296        if new_length == 0 {
297            self.segments[0] = Segment::empty();
298            new_length = 1;
299        }
300
301        self.double_dot_segment_count = double_dot_segment_count;
302        self.leading_double_dot_segment_count = double_dot_segment_count;
303        self.single_dot_segment_count = if self.segments[0].is_single_dot_segment() {
304            1
305        } else {
306            0
307        };
308
309        self.segments.truncate(new_length);
310    }
311
312    /// Pops the last segment off of the path.
313    ///
314    /// If the path only contains one segment, then that segment will become empty.
315    ///
316    /// ```
317    /// use std::convert::TryFrom;
318    ///
319    /// use uriparse::Path;
320    ///
321    /// let mut path = Path::try_from("/my/path").unwrap();
322    /// path.pop();
323    /// assert_eq!(path, "/my");
324    /// path.pop();
325    /// assert_eq!(path, "/");
326    /// ```
327    pub fn pop(&mut self) {
328        let segment = self.segments.pop().unwrap();
329
330        if segment.is_single_dot_segment() {
331            self.single_dot_segment_count =
332                self.single_dot_segment_count.checked_sub(1).unwrap_or(0);
333        }
334
335        if segment.is_double_dot_segment() {
336            self.double_dot_segment_count =
337                self.double_dot_segment_count.checked_sub(1).unwrap_or(0);
338
339            if self.double_dot_segment_count < self.leading_double_dot_segment_count {
340                self.leading_double_dot_segment_count -= 1;
341            }
342        }
343
344        if !segment.is_normalized() {
345            self.unnormalized_count = self.unnormalized_count.checked_sub(1).unwrap_or(0);
346        }
347
348        if self.segments.is_empty() {
349            self.segments.push(Segment::empty());
350        }
351    }
352
353    /// Pushes a segment onto the path.
354    ///
355    /// If the conversion to a [`Segment`] fails, an [`InvalidPath`] will be returned.
356    ///
357    /// The behavior of this function is different if the current path is just one empty segment. In
358    /// this case, the pushed segment will replace that empty segment unless the pushed segment is
359    /// itself empty.
360    ///
361    /// ```
362    /// use std::convert::TryFrom;
363    ///
364    /// use uriparse::Path;
365    ///
366    /// let mut path = Path::try_from("/my/path").unwrap();
367    /// path.push("test");
368    /// assert_eq!(path, "/my/path/test");
369    ///
370    /// let mut path = Path::try_from("/").unwrap();
371    /// path.push("test");
372    /// assert_eq!(path, "/test");
373    ///
374    /// let mut path = Path::try_from("/").unwrap();
375    /// path.push("");
376    /// assert_eq!(path, "//");
377    /// ```
378    pub fn push<TSegment, TSegmentError>(&mut self, segment: TSegment) -> Result<(), PathError>
379    where
380        Segment<'path>: TryFrom<TSegment, Error = TSegmentError>,
381        PathError: From<TSegmentError>,
382    {
383        if self.segments.len() as u16 == u16::max_value() {
384            return Err(PathError::ExceededMaximumLength);
385        }
386
387        let segment = Segment::try_from(segment)?;
388
389        if segment.is_single_dot_segment() {
390            self.single_dot_segment_count += 1;
391        }
392
393        if segment.is_double_dot_segment() {
394            if self.segments.len() as u16 == self.double_dot_segment_count {
395                self.leading_double_dot_segment_count += 1;
396            }
397
398            self.double_dot_segment_count += 1;
399        }
400
401        if !segment.is_normalized() {
402            self.unnormalized_count += 1;
403        }
404
405        if segment != "" && self.segments.len() == 1 && self.segments[0].as_str().is_empty() {
406            self.segments[0] = segment;
407        } else {
408            self.segments.push(segment);
409        }
410
411        Ok(())
412    }
413
414    /// Removes all dot segments from the path according to the algorithm described in
415    /// [[RFC3986, Section 5.2.4](https://tools.ietf.org/html/rfc3986#section-5.2.4)].
416    ///
417    /// This function will perform no memory allocations during removal of dot segments.
418    ///
419    /// If the path currently has no dot segments, then this function is a no-op.
420    ///
421    /// # Examples
422    ///
423    /// ```
424    /// use std::convert::TryFrom;
425    ///
426    /// use uriparse::Path;
427    ///
428    /// let mut path = Path::try_from("/a/b/c/./../../g").unwrap();
429    /// path.remove_dot_segments();
430    /// assert_eq!(path, "/a/g");
431    /// ```
432    pub fn remove_dot_segments(&mut self) {
433        if self.single_dot_segment_count == 0 && self.double_dot_segment_count == 0 {
434            return;
435        }
436
437        self.remove_dot_segments_helper(false);
438    }
439
440    /// Helper function that removes all dot segments with optional segment normalization.
441    fn remove_dot_segments_helper(&mut self, normalize_segments: bool) {
442        let mut input_absolute = self.absolute;
443        let mut new_length = 0;
444
445        for i in 0..self.segments.len() {
446            let segment = &self.segments[i];
447
448            if input_absolute {
449                if segment.is_single_dot_segment() {
450                    continue;
451                } else if segment.is_double_dot_segment() {
452                    if new_length > 0 {
453                        new_length -= 1;
454                    } else {
455                        self.absolute = false;
456                    }
457
458                    continue;
459                }
460
461                if new_length == 0 {
462                    self.absolute = true;
463                }
464            } else if segment.is_single_dot_segment() || segment.is_double_dot_segment() {
465                continue;
466            }
467
468            self.segments.swap(i, new_length);
469
470            if normalize_segments {
471                self.segments[new_length].normalize();
472            }
473
474            new_length += 1;
475
476            if i < self.segments.len() - 1 {
477                input_absolute = true;
478            } else {
479                input_absolute = false;
480            }
481        }
482
483        if input_absolute {
484            if new_length == 0 {
485                self.absolute = true;
486            } else {
487                self.segments[new_length] = Segment::empty();
488                new_length += 1;
489            }
490        }
491
492        if new_length == 0 {
493            self.segments[0] = Segment::empty();
494            new_length = 1;
495        }
496
497        self.double_dot_segment_count = 0;
498        self.leading_double_dot_segment_count = 0;
499        self.single_dot_segment_count = 0;
500        self.segments.truncate(new_length);
501    }
502
503    /// Returns the segments of the path.
504    ///
505    /// If you require mutability, use [`Path::segments_mut`].
506    ///
507    /// # Examples
508    ///
509    /// ```
510    /// use std::convert::TryFrom;
511    ///
512    /// use uriparse::Path;
513    ///
514    /// let mut path = Path::try_from("/my/path").unwrap();
515    /// assert_eq!(path.segments()[1], "path");
516    /// ```
517    pub fn segments(&self) -> &[Segment<'path>] {
518        &self.segments
519    }
520
521    /// Returns the segments of the path mutably.
522    ///
523    /// Due to the required restriction that there must be at least one segment in a path, this
524    /// mutability only applies to the segments themselves, not the container.
525    ///
526    /// # Examples
527    ///
528    /// ```
529    /// use std::convert::TryFrom;
530    ///
531    /// use uriparse::{Path, Segment};
532    ///
533    /// let mut path = Path::try_from("/my/path").unwrap();
534    /// let mut segments = path.segments_mut();
535    /// segments[1] = Segment::try_from("test").unwrap();
536    ///
537    /// assert_eq!(path, "/my/test");
538    /// ```
539    pub fn segments_mut(&mut self) -> &mut [Segment<'path>] {
540        &mut self.segments
541    }
542
543    /// Sets whether the path is absolute (i.e. it starts with a `'/'`).
544    ///
545    /// # Examples
546    ///
547    /// ```
548    /// use std::convert::TryFrom;
549    ///
550    /// use uriparse::Path;
551    ///
552    /// let mut path = Path::try_from("/my/path").unwrap();
553    /// path.set_absolute(false);
554    /// assert_eq!(path, "my/path");
555    /// ```
556    pub fn set_absolute(&mut self, absolute: bool) {
557        self.absolute = absolute;
558    }
559
560    /// Returns a new path which is identical but has a lifetime tied to this path.
561    ///
562    /// This function will perform a memory allocation.
563    pub fn to_borrowed(&self) -> Path {
564        let segments = self.segments.iter().map(Segment::as_borrowed).collect();
565
566        Path {
567            absolute: self.absolute,
568            double_dot_segment_count: self.double_dot_segment_count,
569            leading_double_dot_segment_count: self.leading_double_dot_segment_count,
570            segments,
571            single_dot_segment_count: self.single_dot_segment_count,
572            unnormalized_count: self.unnormalized_count,
573        }
574    }
575}
576
577impl Display for Path<'_> {
578    fn fmt(&self, formatter: &mut Formatter) -> fmt::Result {
579        if self.absolute {
580            formatter.write_char('/')?;
581        }
582
583        for (index, segment) in self.segments.iter().enumerate() {
584            formatter.write_str(segment.as_str())?;
585
586            if index < self.segments.len() - 1 {
587                formatter.write_char('/')?;
588            }
589        }
590
591        Ok(())
592    }
593}
594
595impl Hash for Path<'_> {
596    fn hash<H>(&self, state: &mut H)
597    where
598        H: Hasher,
599    {
600        self.segments.hash(state)
601    }
602}
603
604impl<'path> From<Path<'path>> for String {
605    fn from(value: Path<'path>) -> Self {
606        value.to_string()
607    }
608}
609
610impl PartialEq for Path<'_> {
611    fn eq(&self, other: &Path) -> bool {
612        self.segments == other.segments
613    }
614}
615
616impl PartialEq<[u8]> for Path<'_> {
617    fn eq(&self, mut other: &[u8]) -> bool {
618        if self.absolute {
619            match other.get(0) {
620                Some(&byte) => {
621                    if byte != b'/' {
622                        return false;
623                    }
624                }
625                None => return false,
626            }
627
628            other = &other[1..];
629        }
630
631        for (index, segment) in self.segments.iter().enumerate() {
632            let len = segment.as_str().len();
633
634            if other.len() < len || &other[..len] != segment {
635                return false;
636            }
637
638            other = &other[len..];
639
640            if index < self.segments.len() - 1 {
641                match other.get(0) {
642                    Some(&byte) => {
643                        if byte != b'/' {
644                            return false;
645                        }
646                    }
647                    None => return false,
648                }
649
650                other = &other[1..];
651            }
652        }
653
654        true
655    }
656}
657
658impl<'path> PartialEq<Path<'path>> for [u8] {
659    fn eq(&self, other: &Path<'path>) -> bool {
660        other == self
661    }
662}
663
664impl<'a> PartialEq<&'a [u8]> for Path<'_> {
665    fn eq(&self, other: &&'a [u8]) -> bool {
666        self == *other
667    }
668}
669
670impl<'a, 'path> PartialEq<Path<'path>> for &'a [u8] {
671    fn eq(&self, other: &Path<'path>) -> bool {
672        other == *self
673    }
674}
675
676impl PartialEq<str> for Path<'_> {
677    fn eq(&self, other: &str) -> bool {
678        self == other.as_bytes()
679    }
680}
681
682impl<'path> PartialEq<Path<'path>> for str {
683    fn eq(&self, other: &Path<'path>) -> bool {
684        other == self.as_bytes()
685    }
686}
687
688impl<'a> PartialEq<&'a str> for Path<'_> {
689    fn eq(&self, other: &&'a str) -> bool {
690        self == other.as_bytes()
691    }
692}
693
694impl<'a, 'path> PartialEq<Path<'path>> for &'a str {
695    fn eq(&self, other: &Path<'path>) -> bool {
696        other == self.as_bytes()
697    }
698}
699
700impl<'path> TryFrom<&'path [u8]> for Path<'path> {
701    type Error = PathError;
702
703    fn try_from(value: &'path [u8]) -> Result<Self, Self::Error> {
704        let (path, rest) = parse_path(value)?;
705
706        if rest.is_empty() {
707            Ok(path)
708        } else {
709            Err(PathError::InvalidCharacter)
710        }
711    }
712}
713
714impl<'path> TryFrom<&'path str> for Path<'path> {
715    type Error = PathError;
716
717    fn try_from(value: &'path str) -> Result<Self, Self::Error> {
718        Path::try_from(value.as_bytes())
719    }
720}
721
722/// A segment of a path.
723///
724/// Segments are separated from other segments with the `'/'` delimiter.
725#[derive(Clone, Debug)]
726#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
727pub struct Segment<'segment> {
728    /// Whether the segment is normalized.
729    normalized: bool,
730
731    /// The internal segment source that is either owned or borrowed.
732    segment: Cow<'segment, str>,
733}
734
735impl Segment<'_> {
736    /// Returns a new segment which is identical but has as lifetime tied to this segment.
737    pub fn as_borrowed(&self) -> Segment {
738        use self::Cow::*;
739
740        let segment = match &self.segment {
741            Borrowed(borrowed) => *borrowed,
742            Owned(owned) => owned.as_str(),
743        };
744
745        Segment {
746            normalized: self.normalized,
747            segment: Cow::Borrowed(segment),
748        }
749    }
750
751    /// Returns a `str` representation of the segment.
752    ///
753    /// # Examples
754    ///
755    /// ```
756    /// use std::convert::TryFrom;
757    ///
758    /// use uriparse::Segment;
759    ///
760    /// let segment = Segment::try_from("segment").unwrap();
761    /// assert_eq!(segment.as_str(), "segment");
762    /// ```
763    pub fn as_str(&self) -> &str {
764        &self.segment
765    }
766
767    /// Constructs a segment that is empty.
768    ///
769    /// # Examples
770    ///
771    /// ```
772    /// use uriparse::Segment;
773    ///
774    /// assert_eq!(Segment::empty(),  "");
775    /// ```
776    pub fn empty() -> Segment<'static> {
777        Segment {
778            normalized: true,
779            segment: Cow::from(""),
780        }
781    }
782
783    /// Converts the [`Segment`] into an owned copy.
784    ///
785    /// If you construct the segment from a source with a non-static lifetime, you may run into
786    /// lifetime problems due to the way the struct is designed. Calling this function will ensure
787    /// that the returned value has a static lifetime.
788    ///
789    /// This is different from just cloning. Cloning the segment will just copy the references, and
790    /// thus the lifetime will remain the same.
791    pub fn into_owned(self) -> Segment<'static> {
792        Segment {
793            normalized: self.normalized,
794            segment: Cow::from(self.segment.into_owned()),
795        }
796    }
797
798    /// Returns whether the segment is a dot segment, i.e., is `"."` or `".."`.
799    ///
800    /// # Examples
801    ///
802    /// ```
803    /// use std::convert::TryFrom;
804    ///
805    /// use uriparse::Segment;
806    ///
807    /// let segment = Segment::try_from("segment").unwrap();
808    /// assert!(!segment.is_dot_segment());
809    ///
810    /// let segment = Segment::try_from(".").unwrap();
811    /// assert!(segment.is_dot_segment());
812    ///
813    /// let segment = Segment::try_from("..").unwrap();
814    /// assert!(segment.is_dot_segment());
815    /// ```
816    pub fn is_dot_segment(&self) -> bool {
817        self == "." || self == ".."
818    }
819
820    /// Returns whether the segment is a dot segment, i.e., is `".."`.
821    ///
822    /// # Examples
823    ///
824    /// ```
825    /// use std::convert::TryFrom;
826    ///
827    /// use uriparse::Segment;
828    ///
829    /// let segment = Segment::try_from("segment").unwrap();
830    /// assert!(!segment.is_double_dot_segment());
831    ///
832    /// let segment = Segment::try_from(".").unwrap();
833    /// assert!(!segment.is_double_dot_segment());
834    ///
835    /// let segment = Segment::try_from("..").unwrap();
836    /// assert!(segment.is_double_dot_segment());
837    /// ```
838    pub fn is_double_dot_segment(&self) -> bool {
839        self == ".."
840    }
841
842    /// Returns whether the segment is normalized.
843    ///
844    /// A normalized segment will have no bytes that are in the unreserved character set
845    /// percent-encoded and all alphabetical characters in percent-encodings will be uppercase.
846    ///
847    /// # Examples
848    ///
849    /// ```
850    /// use std::convert::TryFrom;
851    ///
852    /// use uriparse::Segment;
853    ///
854    /// let segment = Segment::try_from("segment").unwrap();
855    /// assert!(segment.is_normalized());
856    ///
857    /// let mut segment = Segment::try_from("%ff%ff").unwrap();
858    /// assert!(!segment.is_normalized());
859    /// segment.normalize();
860    /// assert!(segment.is_normalized());
861    /// ```
862    pub fn is_normalized(&self) -> bool {
863        self.normalized
864    }
865
866    /// Returns whether the segment is a dot segment, i.e., is `"."`.
867    ///
868    /// # Examples
869    ///
870    /// ```
871    /// use std::convert::TryFrom;
872    ///
873    /// use uriparse::Segment;
874    ///
875    /// let segment = Segment::try_from("segment").unwrap();
876    /// assert!(!segment.is_single_dot_segment());
877    ///
878    /// let segment = Segment::try_from(".").unwrap();
879    /// assert!(segment.is_single_dot_segment());
880    ///
881    /// let segment = Segment::try_from("..").unwrap();
882    /// assert!(!segment.is_single_dot_segment());
883    /// ```
884    pub fn is_single_dot_segment(&self) -> bool {
885        self == "."
886    }
887
888    /// Normalizes the segment such that it will have no bytes that are in the unreserved character
889    /// set percent-encoded and all alphabetical characters in percent-encodings will be uppercase.
890    ///
891    /// If the segment is already normalized, the function will return immediately. Otherwise, if
892    /// the segment is not owned, this function will perform an allocation to clone it. The
893    /// normalization itself though, is done in-place with no extra memory allocations required.
894    ///
895    /// # Examples
896    ///
897    /// ```
898    /// use std::convert::TryFrom;
899    ///
900    /// use uriparse::Segment;
901    ///
902    /// let mut segment = Segment::try_from("segment").unwrap();
903    /// segment.normalize();
904    /// assert_eq!(segment, "segment");
905    ///
906    /// let mut segment = Segment::try_from("%ff%41").unwrap();
907    /// assert_eq!(segment, "%ff%41");
908    /// segment.normalize();
909    /// assert_eq!(segment, "%FFA");
910    /// ```
911    pub fn normalize(&mut self) {
912        if !self.normalized {
913            // Unsafe: Paths must be valid ASCII-US, so this is safe.
914            unsafe { normalize_string(&mut self.segment.to_mut(), true) };
915            self.normalized = true;
916        }
917    }
918}
919
920impl AsRef<[u8]> for Segment<'_> {
921    fn as_ref(&self) -> &[u8] {
922        self.segment.as_bytes()
923    }
924}
925
926impl AsRef<str> for Segment<'_> {
927    fn as_ref(&self) -> &str {
928        &self.segment
929    }
930}
931
932impl Deref for Segment<'_> {
933    type Target = str;
934
935    fn deref(&self) -> &Self::Target {
936        &self.segment
937    }
938}
939
940impl Display for Segment<'_> {
941    fn fmt(&self, formatter: &mut Formatter) -> fmt::Result {
942        formatter.write_str(&self.segment)
943    }
944}
945
946impl Eq for Segment<'_> {}
947
948impl<'segment> From<Segment<'segment>> for String {
949    fn from(value: Segment<'segment>) -> Self {
950        value.to_string()
951    }
952}
953
954impl Hash for Segment<'_> {
955    fn hash<H>(&self, state: &mut H)
956    where
957        H: Hasher,
958    {
959        percent_encoded_hash(self.segment.as_bytes(), state, true);
960    }
961}
962
963impl PartialEq for Segment<'_> {
964    fn eq(&self, other: &Segment) -> bool {
965        *self == *other.as_bytes()
966    }
967}
968
969impl PartialEq<[u8]> for Segment<'_> {
970    fn eq(&self, other: &[u8]) -> bool {
971        percent_encoded_equality(self.segment.as_bytes(), other, true)
972    }
973}
974
975impl<'segment> PartialEq<Segment<'segment>> for [u8] {
976    fn eq(&self, other: &Segment<'segment>) -> bool {
977        other == self
978    }
979}
980
981impl<'a> PartialEq<&'a [u8]> for Segment<'_> {
982    fn eq(&self, other: &&'a [u8]) -> bool {
983        self == *other
984    }
985}
986
987impl<'a, 'segment> PartialEq<Segment<'segment>> for &'a [u8] {
988    fn eq(&self, other: &Segment<'segment>) -> bool {
989        other == *self
990    }
991}
992
993impl PartialEq<str> for Segment<'_> {
994    fn eq(&self, other: &str) -> bool {
995        self == other.as_bytes()
996    }
997}
998
999impl<'segment> PartialEq<Segment<'segment>> for str {
1000    fn eq(&self, other: &Segment<'segment>) -> bool {
1001        other == self.as_bytes()
1002    }
1003}
1004
1005impl<'a> PartialEq<&'a str> for Segment<'_> {
1006    fn eq(&self, other: &&'a str) -> bool {
1007        self == other.as_bytes()
1008    }
1009}
1010
1011impl<'a, 'segment> PartialEq<Segment<'segment>> for &'a str {
1012    fn eq(&self, other: &Segment<'segment>) -> bool {
1013        other == self.as_bytes()
1014    }
1015}
1016
1017impl<'segment> TryFrom<&'segment [u8]> for Segment<'segment> {
1018    type Error = PathError;
1019
1020    fn try_from(value: &'segment [u8]) -> Result<Self, Self::Error> {
1021        let mut bytes = value.iter();
1022        let mut normalized = true;
1023
1024        while let Some(&byte) = bytes.next() {
1025            match PATH_CHAR_MAP[byte as usize] {
1026                0 => return Err(PathError::InvalidCharacter),
1027                b'%' => {
1028                    match get_percent_encoded_value(bytes.next().cloned(), bytes.next().cloned()) {
1029                        Ok((hex_value, uppercase)) => {
1030                            if !uppercase || UNRESERVED_CHAR_MAP[hex_value as usize] != 0 {
1031                                normalized = false;
1032                            }
1033                        }
1034                        Err(_) => return Err(PathError::InvalidPercentEncoding),
1035                    }
1036                }
1037                _ => (),
1038            }
1039        }
1040
1041        // Unsafe: The loop above makes sure the byte string is valid ASCII-US.
1042        let segment = Segment {
1043            normalized,
1044            segment: Cow::Borrowed(unsafe { str::from_utf8_unchecked(value) }),
1045        };
1046        Ok(segment)
1047    }
1048}
1049
1050impl<'segment> TryFrom<&'segment str> for Segment<'segment> {
1051    type Error = PathError;
1052
1053    fn try_from(value: &'segment str) -> Result<Self, Self::Error> {
1054        Segment::try_from(value.as_bytes())
1055    }
1056}
1057
1058/// An error representing an invalid path.
1059#[derive(Clone, Copy, Debug, Eq, Hash, PartialEq)]
1060#[non_exhaustive]
1061pub enum PathError {
1062    /// The path exceeded the maximum length allowed. Due to implementation reasons, the maximum
1063    /// length a path can be is 2^16 or 65536 characters.
1064    ExceededMaximumLength,
1065
1066    /// The path contained an invalid character.
1067    InvalidCharacter,
1068
1069    /// The path contained an invalid percent encoding (e.g. `"%ZZ"`).
1070    InvalidPercentEncoding,
1071}
1072
1073impl Display for PathError {
1074    fn fmt(&self, formatter: &mut Formatter) -> fmt::Result {
1075        use self::PathError::*;
1076
1077        match self {
1078            ExceededMaximumLength => write!(formatter, "exceeded maximum path length"),
1079            InvalidCharacter => write!(formatter, "invalid path character"),
1080            InvalidPercentEncoding => write!(formatter, "invalid path percent encoding"),
1081        }
1082    }
1083}
1084
1085impl Error for PathError {}
1086
1087impl From<Infallible> for PathError {
1088    fn from(_: Infallible) -> Self {
1089        PathError::InvalidCharacter
1090    }
1091}
1092
1093/// Parses the path from the given byte string.
1094pub(crate) fn parse_path(value: &[u8]) -> Result<(Path, &[u8]), PathError> {
1095    struct SegmentInfo {
1096        absolute: bool,
1097        double_dot_segment_count: u16,
1098        index: u16,
1099        last_double_dot_segment: Option<u16>,
1100        leading_double_dot_segment_count: u16,
1101        normalized: bool,
1102        single_dot_segment_count: u16,
1103        unnormalized_count: u16,
1104    }
1105
1106    impl SegmentInfo {
1107        fn into_path<'path>(self, segments: Vec<Segment<'path>>) -> Path<'path> {
1108            Path {
1109                absolute: self.absolute,
1110                double_dot_segment_count: self.double_dot_segment_count,
1111                leading_double_dot_segment_count: self.leading_double_dot_segment_count,
1112                segments,
1113                single_dot_segment_count: self.single_dot_segment_count,
1114                unnormalized_count: self.unnormalized_count,
1115            }
1116        }
1117    }
1118
1119    #[allow(clippy::too_many_arguments)]
1120    fn new_segment<'segment>(
1121        segment: &'segment [u8],
1122        segment_info: &mut SegmentInfo,
1123    ) -> Segment<'segment> {
1124        if !segment_info.normalized {
1125            segment_info.unnormalized_count += 1;
1126        }
1127
1128        if segment == b"." {
1129            segment_info.single_dot_segment_count += 1;
1130        }
1131
1132        if segment == b".." {
1133            let index = segment_info.index - 1;
1134            segment_info.double_dot_segment_count += 1;
1135
1136            if index == 0 || segment_info.last_double_dot_segment == Some(index - 1) {
1137                segment_info.leading_double_dot_segment_count += 1;
1138                segment_info.last_double_dot_segment = Some(index);
1139            }
1140        }
1141
1142        // Unsafe: The loop above makes sure the byte string is valid ASCII-US.
1143        Segment {
1144            normalized: segment_info.normalized,
1145            segment: Cow::from(unsafe { str::from_utf8_unchecked(segment) }),
1146        }
1147    }
1148
1149    let (value, absolute) = if value.starts_with(b"/") {
1150        (&value[1..], true)
1151    } else {
1152        (value, false)
1153    };
1154
1155    let mut bytes = value.iter();
1156    let mut segment_info = SegmentInfo {
1157        absolute,
1158        double_dot_segment_count: 0,
1159        index: 1,
1160        last_double_dot_segment: None,
1161        leading_double_dot_segment_count: 0,
1162        normalized: true,
1163        single_dot_segment_count: 0,
1164        unnormalized_count: 0,
1165    };
1166    let mut segment_end_index = 0;
1167    let mut segment_start_index = 0;
1168
1169    // Set some moderate initial capacity. This seems to help with performance a bit.
1170    let mut segments = Vec::with_capacity(10);
1171
1172    while let Some(&byte) = bytes.next() {
1173        match PATH_CHAR_MAP[byte as usize] {
1174            0 if byte == b'?' || byte == b'#' => {
1175                let segment = new_segment(
1176                    &value[segment_start_index..segment_end_index],
1177                    &mut segment_info,
1178                );
1179                segments.push(segment);
1180                let path = segment_info.into_path(segments);
1181                return Ok((path, &value[segment_end_index..]));
1182            }
1183            0 if byte == b'/' => {
1184                let segment = new_segment(
1185                    &value[segment_start_index..segment_end_index],
1186                    &mut segment_info,
1187                );
1188                segments.push(segment);
1189                segment_end_index += 1;
1190                segment_start_index = segment_end_index;
1191                segment_info.index = segment_info
1192                    .index
1193                    .checked_add(1)
1194                    .ok_or(PathError::ExceededMaximumLength)?;
1195                segment_info.normalized = true;
1196            }
1197            0 => return Err(PathError::InvalidCharacter),
1198            b'%' => match get_percent_encoded_value(bytes.next().cloned(), bytes.next().cloned()) {
1199                Ok((hex_value, uppercase)) => {
1200                    if !uppercase || UNRESERVED_CHAR_MAP[hex_value as usize] != 0 {
1201                        segment_info.normalized = false;
1202                    }
1203
1204                    segment_end_index += 3;
1205                }
1206                Err(_) => return Err(PathError::InvalidPercentEncoding),
1207            },
1208            _ => segment_end_index += 1,
1209        }
1210    }
1211
1212    let segment = new_segment(&value[segment_start_index..], &mut segment_info);
1213    segments.push(segment);
1214    let path = segment_info.into_path(segments);
1215    Ok((path, b""))
1216}
1217
1218#[cfg(test)]
1219mod test {
1220    use super::*;
1221
1222    #[test]
1223    fn test_path_equals() {
1224        assert_eq!(
1225            Path::try_from("segment").unwrap(),
1226            Path::try_from("s%65gment").unwrap()
1227        );
1228    }
1229
1230    #[test]
1231    fn test_path_normalize() {
1232        fn test_case(value: &str, expected: &str, as_reference: bool) {
1233            let mut path = Path::try_from(value).unwrap();
1234            path.normalize(as_reference);
1235
1236            let expected_single_dot_segment_count = if expected.starts_with("./") { 1 } else { 0 };
1237            let expected_double_dot_segment_count = expected
1238                .split('/')
1239                .filter(|&segment| segment == "..")
1240                .count() as u16;
1241
1242            assert!(!path.segments().is_empty());
1243            assert!(path.is_normalized(as_reference));
1244            assert_eq!(
1245                path.single_dot_segment_count,
1246                expected_single_dot_segment_count
1247            );
1248            assert_eq!(
1249                path.double_dot_segment_count,
1250                expected_double_dot_segment_count
1251            );
1252            assert_eq!(
1253                path.leading_double_dot_segment_count,
1254                expected_double_dot_segment_count
1255            );
1256            assert_eq!(path.to_string(), expected);
1257        }
1258
1259        test_case("", "", true);
1260        test_case(".", "", true);
1261        test_case("..", "..", true);
1262        test_case("../", "../", true);
1263        test_case("/.", "/", true);
1264        test_case("./././././././.", "", true);
1265        test_case("././././././././", "", true);
1266        test_case("/..", "/", true);
1267        test_case("../..", "../..", true);
1268        test_case("../a/../..", "../..", true);
1269        test_case("a", "a", true);
1270        test_case("a/..", "", true);
1271        test_case("a/../", "", true);
1272        test_case("a/../..", "..", true);
1273        test_case("./a:b", "./a:b", true);
1274        test_case("./a:b/..", "", true);
1275        test_case("./a:b/../c:d", "./c:d", true);
1276        test_case("./../a:b", "../a:b", true);
1277        test_case("../a/../", "../", true);
1278        test_case("../../.././.././../../../.", "../../../../../../..", true);
1279        test_case("a/.././a:b", "./a:b", true);
1280
1281        test_case("", "", false);
1282        test_case(".", "", false);
1283        test_case("..", "", false);
1284        test_case("../", "", false);
1285        test_case("/.", "/", false);
1286        test_case("/..", "/", false);
1287        test_case("../../.././.././../../../.", "", false);
1288        test_case("a/../..", "/", false);
1289        test_case("a/../../", "/", false);
1290        test_case("/a/../../../../", "/", false);
1291        test_case("/a/./././././././c", "/a/c", false);
1292        test_case("/a/.", "/a/", false);
1293        test_case("/a/./", "/a/", false);
1294        test_case("/a/..", "/", false);
1295        test_case("/a/b/./..", "/a/", false);
1296        test_case("/a/b/./../", "/a/", false);
1297        test_case("/a/b/c/./../../g", "/a/g", false);
1298        test_case("mid/content=5/../6", "mid/6", false);
1299
1300        test_case("this/is/a/t%65st/path/%ff", "this/is/a/test/path/%FF", true);
1301        test_case(
1302            "this/is/a/t%65st/path/%ff",
1303            "this/is/a/test/path/%FF",
1304            false,
1305        );
1306    }
1307
1308    #[test]
1309    fn test_path_parse() {
1310        use self::PathError::*;
1311
1312        let slash = "/".to_string();
1313
1314        assert_eq!(Path::try_from("").unwrap(), "");
1315        assert_eq!(Path::try_from("/").unwrap(), "/");
1316        assert_eq!(
1317            Path::try_from("/tHiS/iS/a/PaTh").unwrap(),
1318            "/tHiS/iS/a/PaTh"
1319        );
1320        assert_eq!(Path::try_from("%ff%ff%ff%41").unwrap(), "%ff%ff%ff%41");
1321        assert!(Path::try_from(&*slash.repeat(65535)).is_ok());
1322
1323        assert_eq!(
1324            Path::try_from(&*slash.repeat(65536)),
1325            Err(ExceededMaximumLength)
1326        );
1327        assert_eq!(Path::try_from(" "), Err(InvalidCharacter));
1328        assert_eq!(Path::try_from("#"), Err(InvalidCharacter));
1329        assert_eq!(Path::try_from("%"), Err(InvalidPercentEncoding));
1330        assert_eq!(Path::try_from("%f"), Err(InvalidPercentEncoding));
1331        assert_eq!(Path::try_from("%zz"), Err(InvalidPercentEncoding));
1332    }
1333
1334    #[test]
1335    fn test_path_remove_dot_segments() {
1336        fn test_case(value: &str, expected: &str) {
1337            let mut path = Path::try_from(value).unwrap();
1338            path.remove_dot_segments();
1339            assert!(!path.segments().is_empty());
1340            assert_eq!(path.single_dot_segment_count, 0);
1341            assert_eq!(path.double_dot_segment_count, 0);
1342            assert_eq!(path.leading_double_dot_segment_count, 0);
1343            assert_eq!(path.to_string(), expected);
1344        }
1345
1346        test_case("", "");
1347        test_case(".", "");
1348        test_case("..", "");
1349        test_case("../", "");
1350        test_case("/.", "/");
1351        test_case("/..", "/");
1352        test_case("../../.././.././../../../.", "");
1353        test_case("a/../..", "/");
1354        test_case("a/../../", "/");
1355        test_case("/a/../../../..", "/");
1356        test_case("/a/../../../../", "/");
1357        test_case("/a/./././././././c", "/a/c");
1358        test_case("/a/.", "/a/");
1359        test_case("/a/./", "/a/");
1360        test_case("/a/..", "/");
1361        test_case("/a/b/./..", "/a/");
1362        test_case("/a/b/./../", "/a/");
1363        test_case("/a/b/c/./../../g", "/a/g");
1364        test_case("mid/content=5/../6", "mid/6");
1365    }
1366
1367    #[test]
1368    fn test_segment_normalize() {
1369        fn test_case(value: &str, expected: &str) {
1370            let mut segment = Segment::try_from(value).unwrap();
1371            segment.normalize();
1372            assert_eq!(segment, expected);
1373        }
1374
1375        test_case("", "");
1376        test_case("%ff", "%FF");
1377        test_case("%41", "A");
1378    }
1379
1380    #[test]
1381    fn test_segment_parse() {
1382        use self::PathError::*;
1383
1384        assert_eq!(Segment::try_from("").unwrap(), "");
1385        assert_eq!(Segment::try_from("segment").unwrap(), "segment");
1386        assert_eq!(Segment::try_from("sEgMeNt").unwrap(), "sEgMeNt");
1387        assert_eq!(Segment::try_from("%ff%ff%ff%41").unwrap(), "%ff%ff%ff%41");
1388
1389        assert_eq!(Segment::try_from(" "), Err(InvalidCharacter));
1390        assert_eq!(Segment::try_from("/"), Err(InvalidCharacter));
1391        assert_eq!(Segment::try_from("%"), Err(InvalidPercentEncoding));
1392        assert_eq!(Segment::try_from("%f"), Err(InvalidPercentEncoding));
1393        assert_eq!(Segment::try_from("%zz"), Err(InvalidPercentEncoding));
1394    }
1395}