Skip to main content

iri_string/percent_encode/
decode.rs

1//! Decoders for percent-encoding.
2
3use core::fmt;
4
5#[cfg(feature = "alloc")]
6use alloc::borrow::Cow;
7#[cfg(feature = "alloc")]
8use alloc::string::{FromUtf8Error, String};
9
10use crate::parser::str::strip_decode_xdigits2;
11
12/// Returns the result of [`percent-decode`] algorithm in the WHATWG URL Standard.
13///
14/// [`percent-decode`]: https://url.spec.whatwg.org/#percent-decode
15///
16/// # Examples
17///
18/// ```
19/// use iri_string::percent_encode::decode::decode_whatwg_bytes;
20///
21/// let decoded = decode_whatwg_bytes(b"hello%20world");
22///
23/// assert_eq!(decoded.not_yet_decoded(), &b"hello%20world"[..]);
24///
25/// // This requires `alloc` feature since
26/// // `into_bytes()` returns `Cow<'_, [u8]>`.
27/// # #[cfg(feature = "alloc")]
28/// assert_eq!(decoded.into_bytes(), &b"hello world"[..]);
29/// ```
30#[inline]
31#[must_use]
32pub fn decode_whatwg_bytes(bytes: &[u8]) -> PercentDecodedWhatwgBytes<'_> {
33    PercentDecodedWhatwgBytes::from_raw(bytes)
34}
35
36/// A percent-decoded string based on [`percent-decode`] algorithm of the WHATWG URL standard.
37///
38/// Note that this type does not guarantee that the string is valid
39/// percent-encoded string. The raw string may have stray percent character or
40/// the following digits may be invalid as hexadecimal digits.
41///
42/// Note that comparisons and hashing via std traits (such as `Eq` and `Hash`)
43/// will use the raw value, not the content after decoding.
44///
45/// [`percent-decode`]: https://url.spec.whatwg.org/#percent-decode
46#[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
47pub struct PercentDecodedWhatwgBytes<'a> {
48    /// Not-yet-decoded string.
49    not_yet_decoded: &'a [u8],
50}
51
52impl<'a> PercentDecodedWhatwgBytes<'a> {
53    /// Creates a `PercentDecodedWhatwgBytes` from the possibly non-decoded raw bytes.
54    #[inline]
55    #[must_use]
56    fn from_raw(not_yet_decoded: &'a [u8]) -> Self {
57        Self { not_yet_decoded }
58    }
59
60    /// Returns the not-yet-decoded input bytes.
61    ///
62    /// # Examples
63    ///
64    /// ```
65    /// use iri_string::percent_encode::decode::decode_whatwg_bytes;
66    ///
67    /// let decoded = decode_whatwg_bytes(b"hello%20world");
68    ///
69    /// assert_eq!(decoded.not_yet_decoded(), &b"hello%20world"[..]);
70    /// ```
71    #[inline]
72    #[must_use]
73    pub fn not_yet_decoded(&self) -> &'a [u8] {
74        self.not_yet_decoded
75    }
76
77    /// Decodes the bytes as much as possible.
78    ///
79    /// If the string contains a decodable percent-encoded triplet, returns
80    /// a tuple of:
81    ///
82    /// 1. the length of the prefix that contains no percent-encoded triplets,
83    /// 1. the first decoded byte, and
84    /// 1. the suffix after the decoded percent-encoded triplet.
85    #[must_use]
86    fn try_non_allocating_decode(&self) -> Option<(usize, u8, &'a [u8])> {
87        let mut len_before_pct;
88        let mut rest = self.not_yet_decoded;
89
90        while !rest.is_empty() {
91            #[cfg(feature = "memchr")]
92            let pct_pos = memchr::memchr(b'%', rest);
93            #[cfg(not(feature = "memchr"))]
94            let pct_pos = rest.iter().position(|&b| b == b'%');
95
96            let after_pct;
97            (len_before_pct, after_pct) = match pct_pos {
98                None => return None,
99                Some(pos) => (pos, &rest[(pos + 1)..]),
100            };
101
102            let decoded;
103            (decoded, rest) = strip_decode_xdigits2(after_pct);
104            if let Some(decoded) = decoded {
105                return Some((len_before_pct, decoded, rest));
106            }
107            rest = after_pct;
108        }
109
110        None
111    }
112
113    /// Returns the decoded bytes as a slice if no memory allocation is needed.
114    ///
115    /// # Examples
116    ///
117    /// ```
118    /// use iri_string::percent_encode::decode::decode_whatwg_bytes;
119    ///
120    /// let no_alloc = decode_whatwg_bytes(b"99% unsafe");
121    /// assert_eq!(no_alloc.to_bytes(), Some(&b"99% unsafe"[..]));
122    ///
123    /// let alloc_needed = decode_whatwg_bytes(b"hello%20world");
124    /// assert_eq!(alloc_needed.to_bytes(), None);
125    /// ```
126    #[inline]
127    #[must_use]
128    pub fn to_bytes(&self) -> Option<&'a [u8]> {
129        match self.try_non_allocating_decode() {
130            None => Some(self.not_yet_decoded),
131            _ => None,
132        }
133    }
134
135    /// Decodes the bytes, based on [`percent-decode`] algorithm of the WHATWG URL standard.
136    ///
137    /// # Examples
138    ///
139    /// ```
140    /// use iri_string::percent_encode::decode::decode_whatwg_bytes;
141    ///
142    /// let decoded = decode_whatwg_bytes(b"hello%20world");
143    /// assert_eq!(decoded.into_bytes(), &b"hello world"[..]);
144    /// ```
145    ///
146    /// [`percent-decode`]: https://url.spec.whatwg.org/#percent-decode
147    #[cfg(feature = "alloc")]
148    #[inline]
149    #[must_use]
150    pub fn into_bytes(&self) -> Cow<'a, [u8]> {
151        use crate::parser::str::find_split_hole;
152
153        let (mut result, mut rest) = match self.try_non_allocating_decode() {
154            Some((prefix_len, decoded, rest)) => {
155                let mut prefix = alloc::vec::Vec::from(&self.not_yet_decoded[..prefix_len]);
156                prefix.push(decoded);
157                (prefix, rest)
158            }
159            None => return Cow::Borrowed(self.not_yet_decoded),
160        };
161
162        while !rest.is_empty() {
163            let after_pct = if let Some((no_pct, after_pct)) = find_split_hole(rest, b'%') {
164                result.extend(no_pct);
165                after_pct
166            } else {
167                result.extend(core::mem::take(&mut rest));
168                break;
169            };
170
171            let decoded;
172            (decoded, rest) = strip_decode_xdigits2(after_pct);
173            result.extend(decoded);
174        }
175
176        Cow::Owned(result)
177    }
178
179    /// Decodes the bytes into a string, based on [`percent-decode`] algorithm
180    /// of the WHATWG URL standard.
181    ///
182    /// # Examples
183    ///
184    /// ```
185    /// use iri_string::percent_encode::decode::decode_whatwg_bytes;
186    ///
187    /// let decoded = decode_whatwg_bytes(b"hello%20world");
188    /// assert_eq!(
189    ///     decoded.into_string(),
190    ///     Ok("hello world".to_owned())
191    /// );
192    /// ```
193    ///
194    /// [`percent-decode`]: https://url.spec.whatwg.org/#percent-decode
195    #[cfg(feature = "alloc")]
196    #[inline]
197    pub fn into_string(&self) -> Result<String, FromUtf8Error> {
198        String::from_utf8(self.into_bytes().into_owned())
199    }
200
201    /// Returns an iterator of decoded fragments.
202    ///
203    /// # Examples
204    ///
205    /// ```
206    /// use iri_string::percent_encode::decode::{
207    ///     decode_whatwg_bytes, DecodedFragment,
208    /// };
209    ///
210    /// let mut i = decode_whatwg_bytes(b"100% hello%20world")
211    ///     .bytes_fragments();
212    ///
213    /// assert_eq!(i.next(), Some(DecodedFragment::Direct(b"100")));
214    /// assert_eq!(i.next(), Some(DecodedFragment::StrayPercent));
215    /// assert_eq!(i.next(), Some(DecodedFragment::Direct(b" hello")));
216    /// assert_eq!(i.next(), Some(DecodedFragment::DecodedByte(b' ')));
217    /// assert_eq!(i.next(), Some(DecodedFragment::Direct(b"world")));
218    /// assert_eq!(i.next(), None);
219    /// ```
220    #[inline]
221    #[must_use]
222    pub fn bytes_fragments(&self) -> PercentDecodedBytesFragments<'a> {
223        PercentDecodedBytesFragments {
224            rest: self.not_yet_decoded,
225        }
226    }
227}
228
229impl fmt::Debug for PercentDecodedWhatwgBytes<'_> {
230    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
231        let mut f = f.debug_struct("PercentDecodedWhatwgBytes");
232        // Print as a string if possible.
233        match core::str::from_utf8(self.not_yet_decoded) {
234            Ok(s) => f.field("not_yet_decoded", &s),
235            Err(_) => f.field("not_yet_decoded", &self.not_yet_decoded),
236        };
237        f.finish()
238    }
239}
240
241/// Fragments in a percent-decodable byte sequence.
242#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
243pub enum DecodedFragment<'a> {
244    /// Bytes without percent characters nor percent-encoded triplets.
245    Direct(&'a [u8]),
246    /// A decoded byte from a percent-encoded triplet.
247    DecodedByte(u8),
248    /// A percent character that does not form a valid percent-encoded triplet.
249    StrayPercent,
250}
251
252/// An iterator of fragments in a percent-decodable byte sequence.
253//
254// NOTE: Do not implement `Copy` since an iterator type with `Copy` is a foot-gun.
255#[derive(Clone)]
256pub struct PercentDecodedBytesFragments<'a> {
257    /// Remaining string to decode.
258    rest: &'a [u8],
259}
260
261impl fmt::Debug for PercentDecodedBytesFragments<'_> {
262    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
263        let mut f = f.debug_struct("PercentDecodedBytesFragments");
264        // Print as a string if possible.
265        match core::str::from_utf8(self.rest) {
266            Ok(s) => f.field("rest", &s),
267            Err(_) => f.field("rest", &self.rest),
268        };
269        f.finish()
270    }
271}
272
273impl<'a> PercentDecodedBytesFragments<'a> {
274    /// Returns the remaining not-yet-decoded bytes.
275    ///
276    /// # Examples
277    ///
278    /// ```
279    /// use iri_string::percent_encode::decode::{
280    ///     decode_whatwg_bytes, DecodedFragment,
281    /// };
282    ///
283    /// let mut i = decode_whatwg_bytes(b"hello%20world")
284    ///     .bytes_fragments();
285    ///
286    /// assert_eq!(i.next(), Some(DecodedFragment::Direct(b"hello")));
287    /// assert_eq!(i.not_yet_decoded(), &b"%20world"[..]);
288    /// ```
289    #[inline]
290    #[must_use]
291    pub fn not_yet_decoded(&self) -> &'a [u8] {
292        self.rest
293    }
294}
295
296impl<'a> Iterator for PercentDecodedBytesFragments<'a> {
297    type Item = DecodedFragment<'a>;
298
299    fn next(&mut self) -> Option<Self::Item> {
300        let mut rest = self.rest;
301
302        match rest {
303            [] => None,
304            [b'%', after_pct @ ..] => {
305                let decoded;
306                (decoded, rest) = strip_decode_xdigits2(after_pct);
307                if let Some(decoded) = decoded {
308                    self.rest = rest;
309                    Some(DecodedFragment::DecodedByte(decoded))
310                } else {
311                    self.rest = after_pct;
312                    Some(DecodedFragment::StrayPercent)
313                }
314            }
315            [_, after_first @ ..] => {
316                #[cfg(feature = "memchr")]
317                let pct_pos_minus_one = memchr::memchr(b'%', after_first);
318                #[cfg(not(feature = "memchr"))]
319                let pct_pos_minus_one = after_first.iter().position(|&b| b == b'%');
320
321                let before_pct;
322                (before_pct, self.rest) = match pct_pos_minus_one {
323                    None => (rest, &rest[rest.len()..]),
324                    Some(pct_pos_minus_one) => rest.split_at(pct_pos_minus_one + 1),
325                };
326                Some(DecodedFragment::Direct(before_pct))
327            }
328        }
329    }
330}