iri_string/percent_encode.rs
1//! Percent encoding.
2//!
3//! Functions in this module is intended for manual URI components manipulation.
4//! If you need to convert a Unicode IRI into ASCII-only URI, check `encode_to_uri`
5//! methods of IRI string types (such as [`IriStr::encode_to_uri`]).
6//!
7//! [`IriStr::encode_to_uri`]: `crate::types::IriStr::encode_to_uri`
8
9pub mod decode;
10
11use core::fmt::{self, Write as _};
12use core::marker::PhantomData;
13
14use crate::parser::char;
15use crate::spec::{IriSpec, Spec, UriSpec};
16
17/// A proxy to percent-encode a string as a part of URI.
18pub type PercentEncodedForUri<T> = PercentEncoded<T, UriSpec>;
19
20/// A proxy to percent-encode a string as a part of IRI.
21pub type PercentEncodedForIri<T> = PercentEncoded<T, IriSpec>;
22
23/// Context for percent encoding.
24#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
25#[non_exhaustive]
26enum Context {
27 /// Encode the string as a reg-name (usually called as "hostname").
28 RegName,
29 /// Encode the string as a user name or a password (inside the `userinfo` component).
30 UserOrPassword,
31 /// Encode the string as a path segment.
32 ///
33 /// A slash (`/`) will be encoded to `%2F`.
34 PathSegment,
35 /// Encode the string as path segments joined with `/`.
36 ///
37 /// A slash (`/`) will be used as is.
38 Path,
39 /// Encode the string as a query string (without the `?` prefix).
40 Query,
41 /// Encode the string as a fragment string (without the `#` prefix).
42 Fragment,
43 /// Encode all characters except for `unreserved` characters.
44 Unreserve,
45 /// Encode characters only if they cannot appear anywhere in an IRI reference.
46 ///
47 /// `%` character will be always encoded.
48 Character,
49}
50
51/// A proxy to percent-encode a string.
52///
53/// Type aliases [`PercentEncodedForIri`] and [`PercentEncodedForUri`] are provided.
54/// You can use them to make the expression simpler, for example write
55/// `PercentEncodedForUri::from_path(foo)` instead of
56/// `PercentEncoded::<_, UriSpec>::from_path(foo)`.
57#[derive(Debug, Clone, Copy)]
58pub struct PercentEncoded<T, S> {
59 /// Source string context.
60 context: Context,
61 /// Raw string before being encoded.
62 raw: T,
63 /// Spec.
64 _spec: PhantomData<fn() -> S>,
65}
66
67impl<T: fmt::Display, S: Spec> PercentEncoded<T, S> {
68 /// Creates an encoded string from a raw reg-name (i.e. hostname or domain).
69 ///
70 /// # Examples
71 ///
72 /// ```
73 /// # #[cfg(feature = "alloc")] {
74 /// use iri_string::percent_encode::PercentEncoded;
75 /// use iri_string::spec::UriSpec;
76 ///
77 /// let raw = "alpha.\u{03B1}.example.com";
78 /// let encoded = "alpha.%CE%B1.example.com";
79 /// assert_eq!(
80 /// PercentEncoded::<_, UriSpec>::from_reg_name(raw).to_string(),
81 /// encoded
82 /// );
83 /// # }
84 /// ```
85 pub fn from_reg_name(raw: T) -> Self {
86 Self {
87 context: Context::RegName,
88 raw,
89 _spec: PhantomData,
90 }
91 }
92
93 /// Creates an encoded string from a raw user name (inside `userinfo` component).
94 ///
95 /// # Examples
96 ///
97 /// ```
98 /// # #[cfg(feature = "alloc")] {
99 /// use iri_string::percent_encode::PercentEncoded;
100 /// use iri_string::spec::UriSpec;
101 ///
102 /// let raw = "user:\u{03B1}";
103 /// // The first `:` will be interpreted as a delimiter, so colons will be escaped.
104 /// let encoded = "user%3A%CE%B1";
105 /// assert_eq!(
106 /// PercentEncoded::<_, UriSpec>::from_user(raw).to_string(),
107 /// encoded
108 /// );
109 /// # }
110 /// ```
111 pub fn from_user(raw: T) -> Self {
112 Self {
113 context: Context::UserOrPassword,
114 raw,
115 _spec: PhantomData,
116 }
117 }
118
119 /// Creates an encoded string from a raw user name (inside `userinfo` component).
120 ///
121 /// # Examples
122 ///
123 /// ```
124 /// # #[cfg(feature = "alloc")] {
125 /// use iri_string::percent_encode::PercentEncoded;
126 /// use iri_string::spec::UriSpec;
127 ///
128 /// let raw = "password:\u{03B1}";
129 /// // The first `:` will be interpreted as a delimiter, and the colon
130 /// // inside the password will be the first one if the user name is empty,
131 /// // so colons will be escaped.
132 /// let encoded = "password%3A%CE%B1";
133 /// assert_eq!(
134 /// PercentEncoded::<_, UriSpec>::from_password(raw).to_string(),
135 /// encoded
136 /// );
137 /// # }
138 /// ```
139 pub fn from_password(raw: T) -> Self {
140 Self {
141 context: Context::UserOrPassword,
142 raw,
143 _spec: PhantomData,
144 }
145 }
146
147 /// Creates an encoded string from a raw path segment.
148 ///
149 /// # Examples
150 ///
151 /// ```
152 /// # #[cfg(feature = "alloc")] {
153 /// use iri_string::percent_encode::PercentEncoded;
154 /// use iri_string::spec::UriSpec;
155 ///
156 /// let raw = "alpha/\u{03B1}?#";
157 /// // Note that `/` is encoded to `%2F`.
158 /// let encoded = "alpha%2F%CE%B1%3F%23";
159 /// assert_eq!(
160 /// PercentEncoded::<_, UriSpec>::from_path_segment(raw).to_string(),
161 /// encoded
162 /// );
163 /// # }
164 /// ```
165 pub fn from_path_segment(raw: T) -> Self {
166 Self {
167 context: Context::PathSegment,
168 raw,
169 _spec: PhantomData,
170 }
171 }
172
173 /// Creates an encoded string from a raw path.
174 ///
175 /// # Examples
176 ///
177 /// ```
178 /// # #[cfg(feature = "alloc")] {
179 /// use iri_string::percent_encode::PercentEncoded;
180 /// use iri_string::spec::UriSpec;
181 ///
182 /// let raw = "alpha/\u{03B1}?#";
183 /// // Note that `/` is NOT percent encoded.
184 /// let encoded = "alpha/%CE%B1%3F%23";
185 /// assert_eq!(
186 /// PercentEncoded::<_, UriSpec>::from_path(raw).to_string(),
187 /// encoded
188 /// );
189 /// # }
190 /// ```
191 pub fn from_path(raw: T) -> Self {
192 Self {
193 context: Context::Path,
194 raw,
195 _spec: PhantomData,
196 }
197 }
198
199 /// Creates an encoded string from a raw query.
200 ///
201 /// # Examples
202 ///
203 /// ```
204 /// # #[cfg(feature = "alloc")] {
205 /// use iri_string::percent_encode::PercentEncoded;
206 /// use iri_string::spec::UriSpec;
207 ///
208 /// let raw = "alpha/\u{03B1}?#";
209 /// let encoded = "alpha/%CE%B1?%23";
210 /// assert_eq!(
211 /// PercentEncoded::<_, UriSpec>::from_query(raw).to_string(),
212 /// encoded
213 /// );
214 /// # }
215 /// ```
216 pub fn from_query(raw: T) -> Self {
217 Self {
218 context: Context::Query,
219 raw,
220 _spec: PhantomData,
221 }
222 }
223
224 /// Creates an encoded string from a raw fragment.
225 ///
226 /// # Examples
227 ///
228 /// ```
229 /// # #[cfg(feature = "alloc")] {
230 /// use iri_string::percent_encode::PercentEncoded;
231 /// use iri_string::spec::UriSpec;
232 ///
233 /// let raw = "alpha/\u{03B1}?#";
234 /// let encoded = "alpha/%CE%B1?%23";
235 /// assert_eq!(
236 /// PercentEncoded::<_, UriSpec>::from_fragment(raw).to_string(),
237 /// encoded
238 /// );
239 /// # }
240 /// ```
241 pub fn from_fragment(raw: T) -> Self {
242 Self {
243 context: Context::Fragment,
244 raw,
245 _spec: PhantomData,
246 }
247 }
248
249 /// Creates a string consists of only `unreserved` string and percent-encoded triplets.
250 ///
251 /// # Examples
252 ///
253 /// ```
254 /// # #[cfg(feature = "alloc")] {
255 /// use iri_string::percent_encode::PercentEncoded;
256 /// use iri_string::spec::UriSpec;
257 ///
258 /// let unreserved = "%a0-._~\u{03B1}";
259 /// let unreserved_encoded = "%25a0-._~%CE%B1";
260 /// assert_eq!(
261 /// PercentEncoded::<_, UriSpec>::unreserve(unreserved).to_string(),
262 /// unreserved_encoded
263 /// );
264 ///
265 /// let reserved = ":/?#[]@ !$&'()*+,;=";
266 /// let reserved_encoded =
267 /// "%3A%2F%3F%23%5B%5D%40%20%21%24%26%27%28%29%2A%2B%2C%3B%3D";
268 /// assert_eq!(
269 /// PercentEncoded::<_, UriSpec>::unreserve(reserved).to_string(),
270 /// reserved_encoded
271 /// );
272 /// # }
273 /// ```
274 #[inline]
275 #[must_use]
276 pub fn unreserve(raw: T) -> Self {
277 Self {
278 context: Context::Unreserve,
279 raw,
280 _spec: PhantomData,
281 }
282 }
283
284 /// Percent-encodes characters only if they cannot appear anywhere in an IRI reference.
285 ///
286 /// `%` character will be always encoded. In other words, this conversion
287 /// is not aware of percent-encoded triplets.
288 ///
289 /// Note that this encoding process does not guarantee that the resulting
290 /// string is a valid IRI reference.
291 ///
292 /// # Examples
293 ///
294 /// ```
295 /// # #[cfg(feature = "alloc")] {
296 /// use iri_string::percent_encode::PercentEncoded;
297 /// use iri_string::spec::UriSpec;
298 ///
299 /// let unreserved = "%a0-._~\u{03B1}";
300 /// let unreserved_encoded = "%25a0-._~%CE%B1";
301 /// assert_eq!(
302 /// PercentEncoded::<_, UriSpec>::characters(unreserved).to_string(),
303 /// unreserved_encoded
304 /// );
305 ///
306 /// let reserved = ":/?#[]@ !$&'()*+,;=";
307 /// // Note that `%20` cannot appear directly in an IRI reference.
308 /// let expected = ":/?#[]@%20!$&'()*+,;=";
309 /// assert_eq!(
310 /// PercentEncoded::<_, UriSpec>::characters(reserved).to_string(),
311 /// expected
312 /// );
313 /// # }
314 /// ```
315 #[inline]
316 #[must_use]
317 pub fn characters(raw: T) -> Self {
318 Self {
319 context: Context::Character,
320 raw,
321 _spec: PhantomData,
322 }
323 }
324}
325
326impl<T: fmt::Display, S: Spec> fmt::Display for PercentEncoded<T, S> {
327 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
328 /// Filter that encodes a character before written if necessary.
329 struct Filter<'a, 'b, S> {
330 /// Encoding context.
331 context: Context,
332 /// Writer.
333 writer: &'a mut fmt::Formatter<'b>,
334 /// Spec.
335 _spec: PhantomData<fn() -> S>,
336 }
337 impl<S: Spec> fmt::Write for Filter<'_, '_, S> {
338 fn write_str(&mut self, s: &str) -> fmt::Result {
339 s.chars().try_for_each(|c| self.write_char(c))
340 }
341 fn write_char(&mut self, c: char) -> fmt::Result {
342 let is_valid_char = match (self.context, c.is_ascii()) {
343 (Context::RegName, true) => char::is_ascii_regname(c as u8),
344 (Context::RegName, false) => char::is_nonascii_regname::<S>(c),
345 (Context::UserOrPassword, true) => {
346 c != ':' && char::is_ascii_userinfo_ipvfutureaddr(c as u8)
347 }
348 (Context::UserOrPassword, false) => char::is_nonascii_userinfo::<S>(c),
349 (Context::PathSegment, true) => char::is_ascii_pchar(c as u8),
350 (Context::PathSegment, false) => S::is_nonascii_char_unreserved(c),
351 (Context::Path, true) => c == '/' || char::is_ascii_pchar(c as u8),
352 (Context::Path, false) => S::is_nonascii_char_unreserved(c),
353 (Context::Query, true) => c == '/' || char::is_ascii_frag_query(c as u8),
354 (Context::Query, false) => char::is_nonascii_query::<S>(c),
355 (Context::Fragment, true) => c == '/' || char::is_ascii_frag_query(c as u8),
356 (Context::Fragment, false) => char::is_nonascii_fragment::<S>(c),
357 (Context::Unreserve, true) => char::is_ascii_unreserved(c as u8),
358 (Context::Unreserve, false) => S::is_nonascii_char_unreserved(c),
359 (Context::Character, true) => char::is_ascii_unreserved_or_reserved(c as u8),
360 (Context::Character, false) => {
361 S::is_nonascii_char_unreserved(c) || S::is_nonascii_char_private(c)
362 }
363 };
364 if is_valid_char {
365 self.writer.write_char(c)
366 } else {
367 write_pct_encoded_char(&mut self.writer, c)
368 }
369 }
370 }
371 let mut filter = Filter {
372 context: self.context,
373 writer: f,
374 _spec: PhantomData::<fn() -> S>,
375 };
376 write!(filter, "{}", self.raw)
377 }
378}
379
380/// Percent-encodes the given character and writes it.
381#[inline]
382fn write_pct_encoded_char<W: fmt::Write>(writer: &mut W, c: char) -> fmt::Result {
383 let mut buf = [0_u8; 4];
384 let buf = c.encode_utf8(&mut buf);
385 buf.bytes().try_for_each(|b| write!(writer, "%{:02X}", b))
386}