Skip to main content

iri_string/
percent_encode.rs

1//! Percent encoding.
2//!
3//! Functions in this module is intended for manual URI components manipulation.
4//! If you need to convert a Unicode IRI into ASCII-only URI, check `encode_to_uri`
5//! methods of IRI string types (such as [`IriStr::encode_to_uri`]).
6//!
7//! [`IriStr::encode_to_uri`]: `crate::types::IriStr::encode_to_uri`
8
9pub mod decode;
10
11use core::fmt::{self, Write as _};
12use core::marker::PhantomData;
13
14use crate::parser::char;
15use crate::spec::{IriSpec, Spec, UriSpec};
16
17/// A proxy to percent-encode a string as a part of URI.
18pub type PercentEncodedForUri<T> = PercentEncoded<T, UriSpec>;
19
20/// A proxy to percent-encode a string as a part of IRI.
21pub type PercentEncodedForIri<T> = PercentEncoded<T, IriSpec>;
22
23/// Context for percent encoding.
24#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
25#[non_exhaustive]
26enum Context {
27    /// Encode the string as a reg-name (usually called as "hostname").
28    RegName,
29    /// Encode the string as a user name or a password (inside the `userinfo` component).
30    UserOrPassword,
31    /// Encode the string as a path segment.
32    ///
33    /// A slash (`/`) will be encoded to `%2F`.
34    PathSegment,
35    /// Encode the string as path segments joined with `/`.
36    ///
37    /// A slash (`/`) will be used as is.
38    Path,
39    /// Encode the string as a query string (without the `?` prefix).
40    Query,
41    /// Encode the string as a fragment string (without the `#` prefix).
42    Fragment,
43    /// Encode all characters except for `unreserved` characters.
44    Unreserve,
45    /// Encode characters only if they cannot appear anywhere in an IRI reference.
46    ///
47    /// `%` character will be always encoded.
48    Character,
49}
50
51/// A proxy to percent-encode a string.
52///
53/// Type aliases [`PercentEncodedForIri`] and [`PercentEncodedForUri`] are provided.
54/// You can use them to make the expression simpler, for example write
55/// `PercentEncodedForUri::from_path(foo)` instead of
56/// `PercentEncoded::<_, UriSpec>::from_path(foo)`.
57#[derive(Debug, Clone, Copy)]
58pub struct PercentEncoded<T, S> {
59    /// Source string context.
60    context: Context,
61    /// Raw string before being encoded.
62    raw: T,
63    /// Spec.
64    _spec: PhantomData<fn() -> S>,
65}
66
67impl<T: fmt::Display, S: Spec> PercentEncoded<T, S> {
68    /// Creates an encoded string from a raw reg-name (i.e. hostname or domain).
69    ///
70    /// # Examples
71    ///
72    /// ```
73    /// # #[cfg(feature = "alloc")] {
74    /// use iri_string::percent_encode::PercentEncoded;
75    /// use iri_string::spec::UriSpec;
76    ///
77    /// let raw = "alpha.\u{03B1}.example.com";
78    /// let encoded = "alpha.%CE%B1.example.com";
79    /// assert_eq!(
80    ///     PercentEncoded::<_, UriSpec>::from_reg_name(raw).to_string(),
81    ///     encoded
82    /// );
83    /// # }
84    /// ```
85    pub fn from_reg_name(raw: T) -> Self {
86        Self {
87            context: Context::RegName,
88            raw,
89            _spec: PhantomData,
90        }
91    }
92
93    /// Creates an encoded string from a raw user name (inside `userinfo` component).
94    ///
95    /// # Examples
96    ///
97    /// ```
98    /// # #[cfg(feature = "alloc")] {
99    /// use iri_string::percent_encode::PercentEncoded;
100    /// use iri_string::spec::UriSpec;
101    ///
102    /// let raw = "user:\u{03B1}";
103    /// // The first `:` will be interpreted as a delimiter, so colons will be escaped.
104    /// let encoded = "user%3A%CE%B1";
105    /// assert_eq!(
106    ///     PercentEncoded::<_, UriSpec>::from_user(raw).to_string(),
107    ///     encoded
108    /// );
109    /// # }
110    /// ```
111    pub fn from_user(raw: T) -> Self {
112        Self {
113            context: Context::UserOrPassword,
114            raw,
115            _spec: PhantomData,
116        }
117    }
118
119    /// Creates an encoded string from a raw user name (inside `userinfo` component).
120    ///
121    /// # Examples
122    ///
123    /// ```
124    /// # #[cfg(feature = "alloc")] {
125    /// use iri_string::percent_encode::PercentEncoded;
126    /// use iri_string::spec::UriSpec;
127    ///
128    /// let raw = "password:\u{03B1}";
129    /// // The first `:` will be interpreted as a delimiter, and the colon
130    /// // inside the password will be the first one if the user name is empty,
131    /// // so colons will be escaped.
132    /// let encoded = "password%3A%CE%B1";
133    /// assert_eq!(
134    ///     PercentEncoded::<_, UriSpec>::from_password(raw).to_string(),
135    ///     encoded
136    /// );
137    /// # }
138    /// ```
139    pub fn from_password(raw: T) -> Self {
140        Self {
141            context: Context::UserOrPassword,
142            raw,
143            _spec: PhantomData,
144        }
145    }
146
147    /// Creates an encoded string from a raw path segment.
148    ///
149    /// # Examples
150    ///
151    /// ```
152    /// # #[cfg(feature = "alloc")] {
153    /// use iri_string::percent_encode::PercentEncoded;
154    /// use iri_string::spec::UriSpec;
155    ///
156    /// let raw = "alpha/\u{03B1}?#";
157    /// // Note that `/` is encoded to `%2F`.
158    /// let encoded = "alpha%2F%CE%B1%3F%23";
159    /// assert_eq!(
160    ///     PercentEncoded::<_, UriSpec>::from_path_segment(raw).to_string(),
161    ///     encoded
162    /// );
163    /// # }
164    /// ```
165    pub fn from_path_segment(raw: T) -> Self {
166        Self {
167            context: Context::PathSegment,
168            raw,
169            _spec: PhantomData,
170        }
171    }
172
173    /// Creates an encoded string from a raw path.
174    ///
175    /// # Examples
176    ///
177    /// ```
178    /// # #[cfg(feature = "alloc")] {
179    /// use iri_string::percent_encode::PercentEncoded;
180    /// use iri_string::spec::UriSpec;
181    ///
182    /// let raw = "alpha/\u{03B1}?#";
183    /// // Note that `/` is NOT percent encoded.
184    /// let encoded = "alpha/%CE%B1%3F%23";
185    /// assert_eq!(
186    ///     PercentEncoded::<_, UriSpec>::from_path(raw).to_string(),
187    ///     encoded
188    /// );
189    /// # }
190    /// ```
191    pub fn from_path(raw: T) -> Self {
192        Self {
193            context: Context::Path,
194            raw,
195            _spec: PhantomData,
196        }
197    }
198
199    /// Creates an encoded string from a raw query.
200    ///
201    /// # Examples
202    ///
203    /// ```
204    /// # #[cfg(feature = "alloc")] {
205    /// use iri_string::percent_encode::PercentEncoded;
206    /// use iri_string::spec::UriSpec;
207    ///
208    /// let raw = "alpha/\u{03B1}?#";
209    /// let encoded = "alpha/%CE%B1?%23";
210    /// assert_eq!(
211    ///     PercentEncoded::<_, UriSpec>::from_query(raw).to_string(),
212    ///     encoded
213    /// );
214    /// # }
215    /// ```
216    pub fn from_query(raw: T) -> Self {
217        Self {
218            context: Context::Query,
219            raw,
220            _spec: PhantomData,
221        }
222    }
223
224    /// Creates an encoded string from a raw fragment.
225    ///
226    /// # Examples
227    ///
228    /// ```
229    /// # #[cfg(feature = "alloc")] {
230    /// use iri_string::percent_encode::PercentEncoded;
231    /// use iri_string::spec::UriSpec;
232    ///
233    /// let raw = "alpha/\u{03B1}?#";
234    /// let encoded = "alpha/%CE%B1?%23";
235    /// assert_eq!(
236    ///     PercentEncoded::<_, UriSpec>::from_fragment(raw).to_string(),
237    ///     encoded
238    /// );
239    /// # }
240    /// ```
241    pub fn from_fragment(raw: T) -> Self {
242        Self {
243            context: Context::Fragment,
244            raw,
245            _spec: PhantomData,
246        }
247    }
248
249    /// Creates a string consists of only `unreserved` string and percent-encoded triplets.
250    ///
251    /// # Examples
252    ///
253    /// ```
254    /// # #[cfg(feature = "alloc")] {
255    /// use iri_string::percent_encode::PercentEncoded;
256    /// use iri_string::spec::UriSpec;
257    ///
258    /// let unreserved = "%a0-._~\u{03B1}";
259    /// let unreserved_encoded = "%25a0-._~%CE%B1";
260    /// assert_eq!(
261    ///     PercentEncoded::<_, UriSpec>::unreserve(unreserved).to_string(),
262    ///     unreserved_encoded
263    /// );
264    ///
265    /// let reserved = ":/?#[]@ !$&'()*+,;=";
266    /// let reserved_encoded =
267    ///     "%3A%2F%3F%23%5B%5D%40%20%21%24%26%27%28%29%2A%2B%2C%3B%3D";
268    /// assert_eq!(
269    ///     PercentEncoded::<_, UriSpec>::unreserve(reserved).to_string(),
270    ///     reserved_encoded
271    /// );
272    /// # }
273    /// ```
274    #[inline]
275    #[must_use]
276    pub fn unreserve(raw: T) -> Self {
277        Self {
278            context: Context::Unreserve,
279            raw,
280            _spec: PhantomData,
281        }
282    }
283
284    /// Percent-encodes characters only if they cannot appear anywhere in an IRI reference.
285    ///
286    /// `%` character will be always encoded. In other words, this conversion
287    /// is not aware of percent-encoded triplets.
288    ///
289    /// Note that this encoding process does not guarantee that the resulting
290    /// string is a valid IRI reference.
291    ///
292    /// # Examples
293    ///
294    /// ```
295    /// # #[cfg(feature = "alloc")] {
296    /// use iri_string::percent_encode::PercentEncoded;
297    /// use iri_string::spec::UriSpec;
298    ///
299    /// let unreserved = "%a0-._~\u{03B1}";
300    /// let unreserved_encoded = "%25a0-._~%CE%B1";
301    /// assert_eq!(
302    ///     PercentEncoded::<_, UriSpec>::characters(unreserved).to_string(),
303    ///     unreserved_encoded
304    /// );
305    ///
306    /// let reserved = ":/?#[]@ !$&'()*+,;=";
307    /// // Note that `%20` cannot appear directly in an IRI reference.
308    /// let expected = ":/?#[]@%20!$&'()*+,;=";
309    /// assert_eq!(
310    ///     PercentEncoded::<_, UriSpec>::characters(reserved).to_string(),
311    ///     expected
312    /// );
313    /// # }
314    /// ```
315    #[inline]
316    #[must_use]
317    pub fn characters(raw: T) -> Self {
318        Self {
319            context: Context::Character,
320            raw,
321            _spec: PhantomData,
322        }
323    }
324}
325
326impl<T: fmt::Display, S: Spec> fmt::Display for PercentEncoded<T, S> {
327    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
328        /// Filter that encodes a character before written if necessary.
329        struct Filter<'a, 'b, S> {
330            /// Encoding context.
331            context: Context,
332            /// Writer.
333            writer: &'a mut fmt::Formatter<'b>,
334            /// Spec.
335            _spec: PhantomData<fn() -> S>,
336        }
337        impl<S: Spec> fmt::Write for Filter<'_, '_, S> {
338            fn write_str(&mut self, s: &str) -> fmt::Result {
339                s.chars().try_for_each(|c| self.write_char(c))
340            }
341            fn write_char(&mut self, c: char) -> fmt::Result {
342                let is_valid_char = match (self.context, c.is_ascii()) {
343                    (Context::RegName, true) => char::is_ascii_regname(c as u8),
344                    (Context::RegName, false) => char::is_nonascii_regname::<S>(c),
345                    (Context::UserOrPassword, true) => {
346                        c != ':' && char::is_ascii_userinfo_ipvfutureaddr(c as u8)
347                    }
348                    (Context::UserOrPassword, false) => char::is_nonascii_userinfo::<S>(c),
349                    (Context::PathSegment, true) => char::is_ascii_pchar(c as u8),
350                    (Context::PathSegment, false) => S::is_nonascii_char_unreserved(c),
351                    (Context::Path, true) => c == '/' || char::is_ascii_pchar(c as u8),
352                    (Context::Path, false) => S::is_nonascii_char_unreserved(c),
353                    (Context::Query, true) => c == '/' || char::is_ascii_frag_query(c as u8),
354                    (Context::Query, false) => char::is_nonascii_query::<S>(c),
355                    (Context::Fragment, true) => c == '/' || char::is_ascii_frag_query(c as u8),
356                    (Context::Fragment, false) => char::is_nonascii_fragment::<S>(c),
357                    (Context::Unreserve, true) => char::is_ascii_unreserved(c as u8),
358                    (Context::Unreserve, false) => S::is_nonascii_char_unreserved(c),
359                    (Context::Character, true) => char::is_ascii_unreserved_or_reserved(c as u8),
360                    (Context::Character, false) => {
361                        S::is_nonascii_char_unreserved(c) || S::is_nonascii_char_private(c)
362                    }
363                };
364                if is_valid_char {
365                    self.writer.write_char(c)
366                } else {
367                    write_pct_encoded_char(&mut self.writer, c)
368                }
369            }
370        }
371        let mut filter = Filter {
372            context: self.context,
373            writer: f,
374            _spec: PhantomData::<fn() -> S>,
375        };
376        write!(filter, "{}", self.raw)
377    }
378}
379
380/// Percent-encodes the given character and writes it.
381#[inline]
382fn write_pct_encoded_char<W: fmt::Write>(writer: &mut W, c: char) -> fmt::Result {
383    let mut buf = [0_u8; 4];
384    let buf = c.encode_utf8(&mut buf);
385    buf.bytes().try_for_each(|b| write!(writer, "%{:02X}", b))
386}