iri_string/percent_encode/decode.rs
1//! Decoders for percent-encoding.
2
3use core::fmt;
4
5#[cfg(feature = "alloc")]
6use alloc::borrow::Cow;
7#[cfg(feature = "alloc")]
8use alloc::string::{FromUtf8Error, String};
9
10use crate::parser::str::strip_decode_xdigits2;
11
12/// Returns the result of [`percent-decode`] algorithm in the WHATWG URL Standard.
13///
14/// [`percent-decode`]: https://url.spec.whatwg.org/#percent-decode
15///
16/// # Examples
17///
18/// ```
19/// use iri_string::percent_encode::decode::decode_whatwg_bytes;
20///
21/// let decoded = decode_whatwg_bytes(b"hello%20world");
22///
23/// assert_eq!(decoded.not_yet_decoded(), &b"hello%20world"[..]);
24///
25/// // This requires `alloc` feature since
26/// // `into_bytes()` returns `Cow<'_, [u8]>`.
27/// # #[cfg(feature = "alloc")]
28/// assert_eq!(decoded.into_bytes(), &b"hello world"[..]);
29/// ```
30#[inline]
31#[must_use]
32pub fn decode_whatwg_bytes(bytes: &[u8]) -> PercentDecodedWhatwgBytes<'_> {
33 PercentDecodedWhatwgBytes::from_raw(bytes)
34}
35
36/// A percent-decoded string based on [`percent-decode`] algorithm of the WHATWG URL standard.
37///
38/// Note that this type does not guarantee that the string is valid
39/// percent-encoded string. The raw string may have stray percent character or
40/// the following digits may be invalid as hexadecimal digits.
41///
42/// Note that comparisons and hashing via std traits (such as `Eq` and `Hash`)
43/// will use the raw value, not the content after decoding.
44///
45/// [`percent-decode`]: https://url.spec.whatwg.org/#percent-decode
46#[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
47pub struct PercentDecodedWhatwgBytes<'a> {
48 /// Not-yet-decoded string.
49 not_yet_decoded: &'a [u8],
50}
51
52impl<'a> PercentDecodedWhatwgBytes<'a> {
53 /// Creates a `PercentDecodedWhatwgBytes` from the possibly non-decoded raw bytes.
54 #[inline]
55 #[must_use]
56 fn from_raw(not_yet_decoded: &'a [u8]) -> Self {
57 Self { not_yet_decoded }
58 }
59
60 /// Returns the not-yet-decoded input bytes.
61 ///
62 /// # Examples
63 ///
64 /// ```
65 /// use iri_string::percent_encode::decode::decode_whatwg_bytes;
66 ///
67 /// let decoded = decode_whatwg_bytes(b"hello%20world");
68 ///
69 /// assert_eq!(decoded.not_yet_decoded(), &b"hello%20world"[..]);
70 /// ```
71 #[inline]
72 #[must_use]
73 pub fn not_yet_decoded(&self) -> &'a [u8] {
74 self.not_yet_decoded
75 }
76
77 /// Decodes the bytes as much as possible.
78 ///
79 /// If the string contains a decodable percent-encoded triplet, returns
80 /// a tuple of:
81 ///
82 /// 1. the length of the prefix that contains no percent-encoded triplets,
83 /// 1. the first decoded byte, and
84 /// 1. the suffix after the decoded percent-encoded triplet.
85 #[must_use]
86 fn try_non_allocating_decode(&self) -> Option<(usize, u8, &'a [u8])> {
87 let mut len_before_pct;
88 let mut rest = self.not_yet_decoded;
89
90 while !rest.is_empty() {
91 #[cfg(feature = "memchr")]
92 let pct_pos = memchr::memchr(b'%', rest);
93 #[cfg(not(feature = "memchr"))]
94 let pct_pos = rest.iter().position(|&b| b == b'%');
95
96 let after_pct;
97 (len_before_pct, after_pct) = match pct_pos {
98 None => return None,
99 Some(pos) => (pos, &rest[(pos + 1)..]),
100 };
101
102 let decoded;
103 (decoded, rest) = strip_decode_xdigits2(after_pct);
104 if let Some(decoded) = decoded {
105 return Some((len_before_pct, decoded, rest));
106 }
107 rest = after_pct;
108 }
109
110 None
111 }
112
113 /// Returns the decoded bytes as a slice if no memory allocation is needed.
114 ///
115 /// # Examples
116 ///
117 /// ```
118 /// use iri_string::percent_encode::decode::decode_whatwg_bytes;
119 ///
120 /// let no_alloc = decode_whatwg_bytes(b"99% unsafe");
121 /// assert_eq!(no_alloc.to_bytes(), Some(&b"99% unsafe"[..]));
122 ///
123 /// let alloc_needed = decode_whatwg_bytes(b"hello%20world");
124 /// assert_eq!(alloc_needed.to_bytes(), None);
125 /// ```
126 #[inline]
127 #[must_use]
128 pub fn to_bytes(&self) -> Option<&'a [u8]> {
129 match self.try_non_allocating_decode() {
130 None => Some(self.not_yet_decoded),
131 _ => None,
132 }
133 }
134
135 /// Decodes the bytes, based on [`percent-decode`] algorithm of the WHATWG URL standard.
136 ///
137 /// # Examples
138 ///
139 /// ```
140 /// use iri_string::percent_encode::decode::decode_whatwg_bytes;
141 ///
142 /// let decoded = decode_whatwg_bytes(b"hello%20world");
143 /// assert_eq!(decoded.into_bytes(), &b"hello world"[..]);
144 /// ```
145 ///
146 /// [`percent-decode`]: https://url.spec.whatwg.org/#percent-decode
147 #[cfg(feature = "alloc")]
148 #[inline]
149 #[must_use]
150 pub fn into_bytes(&self) -> Cow<'a, [u8]> {
151 use crate::parser::str::find_split_hole;
152
153 let (mut result, mut rest) = match self.try_non_allocating_decode() {
154 Some((prefix_len, decoded, rest)) => {
155 let mut prefix = alloc::vec::Vec::from(&self.not_yet_decoded[..prefix_len]);
156 prefix.push(decoded);
157 (prefix, rest)
158 }
159 None => return Cow::Borrowed(self.not_yet_decoded),
160 };
161
162 while !rest.is_empty() {
163 let after_pct = if let Some((no_pct, after_pct)) = find_split_hole(rest, b'%') {
164 result.extend(no_pct);
165 after_pct
166 } else {
167 result.extend(core::mem::take(&mut rest));
168 break;
169 };
170
171 let decoded;
172 (decoded, rest) = strip_decode_xdigits2(after_pct);
173 result.extend(decoded);
174 }
175
176 Cow::Owned(result)
177 }
178
179 /// Decodes the bytes into a string, based on [`percent-decode`] algorithm
180 /// of the WHATWG URL standard.
181 ///
182 /// # Examples
183 ///
184 /// ```
185 /// use iri_string::percent_encode::decode::decode_whatwg_bytes;
186 ///
187 /// let decoded = decode_whatwg_bytes(b"hello%20world");
188 /// assert_eq!(
189 /// decoded.into_string(),
190 /// Ok("hello world".to_owned())
191 /// );
192 /// ```
193 ///
194 /// [`percent-decode`]: https://url.spec.whatwg.org/#percent-decode
195 #[cfg(feature = "alloc")]
196 #[inline]
197 pub fn into_string(&self) -> Result<String, FromUtf8Error> {
198 String::from_utf8(self.into_bytes().into_owned())
199 }
200
201 /// Returns an iterator of decoded fragments.
202 ///
203 /// # Examples
204 ///
205 /// ```
206 /// use iri_string::percent_encode::decode::{
207 /// decode_whatwg_bytes, DecodedFragment,
208 /// };
209 ///
210 /// let mut i = decode_whatwg_bytes(b"100% hello%20world")
211 /// .bytes_fragments();
212 ///
213 /// assert_eq!(i.next(), Some(DecodedFragment::Direct(b"100")));
214 /// assert_eq!(i.next(), Some(DecodedFragment::StrayPercent));
215 /// assert_eq!(i.next(), Some(DecodedFragment::Direct(b" hello")));
216 /// assert_eq!(i.next(), Some(DecodedFragment::DecodedByte(b' ')));
217 /// assert_eq!(i.next(), Some(DecodedFragment::Direct(b"world")));
218 /// assert_eq!(i.next(), None);
219 /// ```
220 #[inline]
221 #[must_use]
222 pub fn bytes_fragments(&self) -> PercentDecodedBytesFragments<'a> {
223 PercentDecodedBytesFragments {
224 rest: self.not_yet_decoded,
225 }
226 }
227}
228
229impl fmt::Debug for PercentDecodedWhatwgBytes<'_> {
230 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
231 let mut f = f.debug_struct("PercentDecodedWhatwgBytes");
232 // Print as a string if possible.
233 match core::str::from_utf8(self.not_yet_decoded) {
234 Ok(s) => f.field("not_yet_decoded", &s),
235 Err(_) => f.field("not_yet_decoded", &self.not_yet_decoded),
236 };
237 f.finish()
238 }
239}
240
241/// Fragments in a percent-decodable byte sequence.
242#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
243pub enum DecodedFragment<'a> {
244 /// Bytes without percent characters nor percent-encoded triplets.
245 Direct(&'a [u8]),
246 /// A decoded byte from a percent-encoded triplet.
247 DecodedByte(u8),
248 /// A percent character that does not form a valid percent-encoded triplet.
249 StrayPercent,
250}
251
252/// An iterator of fragments in a percent-decodable byte sequence.
253//
254// NOTE: Do not implement `Copy` since an iterator type with `Copy` is a foot-gun.
255#[derive(Clone)]
256pub struct PercentDecodedBytesFragments<'a> {
257 /// Remaining string to decode.
258 rest: &'a [u8],
259}
260
261impl fmt::Debug for PercentDecodedBytesFragments<'_> {
262 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
263 let mut f = f.debug_struct("PercentDecodedBytesFragments");
264 // Print as a string if possible.
265 match core::str::from_utf8(self.rest) {
266 Ok(s) => f.field("rest", &s),
267 Err(_) => f.field("rest", &self.rest),
268 };
269 f.finish()
270 }
271}
272
273impl<'a> PercentDecodedBytesFragments<'a> {
274 /// Returns the remaining not-yet-decoded bytes.
275 ///
276 /// # Examples
277 ///
278 /// ```
279 /// use iri_string::percent_encode::decode::{
280 /// decode_whatwg_bytes, DecodedFragment,
281 /// };
282 ///
283 /// let mut i = decode_whatwg_bytes(b"hello%20world")
284 /// .bytes_fragments();
285 ///
286 /// assert_eq!(i.next(), Some(DecodedFragment::Direct(b"hello")));
287 /// assert_eq!(i.not_yet_decoded(), &b"%20world"[..]);
288 /// ```
289 #[inline]
290 #[must_use]
291 pub fn not_yet_decoded(&self) -> &'a [u8] {
292 self.rest
293 }
294}
295
296impl<'a> Iterator for PercentDecodedBytesFragments<'a> {
297 type Item = DecodedFragment<'a>;
298
299 fn next(&mut self) -> Option<Self::Item> {
300 let mut rest = self.rest;
301
302 match rest {
303 [] => None,
304 [b'%', after_pct @ ..] => {
305 let decoded;
306 (decoded, rest) = strip_decode_xdigits2(after_pct);
307 if let Some(decoded) = decoded {
308 self.rest = rest;
309 Some(DecodedFragment::DecodedByte(decoded))
310 } else {
311 self.rest = after_pct;
312 Some(DecodedFragment::StrayPercent)
313 }
314 }
315 [_, after_first @ ..] => {
316 #[cfg(feature = "memchr")]
317 let pct_pos_minus_one = memchr::memchr(b'%', after_first);
318 #[cfg(not(feature = "memchr"))]
319 let pct_pos_minus_one = after_first.iter().position(|&b| b == b'%');
320
321 let before_pct;
322 (before_pct, self.rest) = match pct_pos_minus_one {
323 None => (rest, &rest[rest.len()..]),
324 Some(pct_pos_minus_one) => rest.split_at(pct_pos_minus_one + 1),
325 };
326 Some(DecodedFragment::Direct(before_pct))
327 }
328 }
329 }
330}