url_escape/encode/
mod.rs

1// Ref: https://url.spec.whatwg.org/
2
3use std::borrow::Cow;
4use std::io::{self, Write};
5use std::str::from_utf8_unchecked;
6
7use crate::percent_encoding::{utf8_percent_encode, AsciiSet};
8
9/// The C0 control percent-encode set are the C0 controls and U+007F (DEL).
10pub use percent_encoding::CONTROLS;
11
12/// Not an ASCII letter or digit.
13pub use percent_encoding::NON_ALPHANUMERIC;
14
15/// The fragment percent-encode set is the C0 control percent-encode set and U+0020 SPACE, U+0022 ("), U+003C (<), U+003E (>), and U+0060 (`).
16pub const FRAGMENT: &AsciiSet = &CONTROLS.add(b' ').add(b'"').add(b'<').add(b'>').add(b'`');
17
18/// The query percent-encode set is the C0 control percent-encode set and U+0020 SPACE, U+0022 ("), U+0023 (#), U+003C (<), and U+003E (>).
19///
20/// The query percent-encode set cannot be defined in terms of the fragment percent-encode set due to the omission of U+0060 (`).
21pub const QUERY: &AsciiSet = &CONTROLS.add(b' ').add(b'"').add(b'#').add(b'<').add(b'>');
22
23/// The special-query percent-encode set is the query percent-encode set and U+0027 (').
24pub const SPECIAL_QUERY: &AsciiSet = &QUERY.add(b'\'');
25
26/// The path percent-encode set is the query percent-encode set and U+003F (?), U+0060 (`), U+007B ({), and U+007D (}).
27pub const PATH: &AsciiSet = &QUERY.add(b'?').add(b'`').add(b'{').add(b'}');
28
29/// The userinfo percent-encode set is the path percent-encode set and U+002F (/), U+003A (:), U+003B (;), U+003D (=), U+0040 (@), U+005B ([) to U+005E (^), inclusive, and U+007C (|).
30pub const USERINFO: &AsciiSet = &PATH
31    .add(b'/')
32    .add(b':')
33    .add(b';')
34    .add(b'=')
35    .add(b'@')
36    .add(b'[')
37    .add(b'\\')
38    .add(b']')
39    .add(b'^')
40    .add(b'|');
41
42/// The component percent-encode set is the userinfo percent-encode set and U+0024 ($) to U+0026 (&), inclusive, U+002B (+), and U+002C (,).
43pub const COMPONENT: &AsciiSet = &USERINFO.add(b'$').add(b'%').add(b'&').add(b'+').add(b',');
44
45/// The application/x-www-form-urlencoded percent-encode set is the component percent-encode set and U+0021 (!), U+0027 (') to U+0029 RIGHT PARENTHESIS, inclusive, and U+007E (~).
46pub const X_WWW_FORM_URLENCODED: &AsciiSet =
47    &COMPONENT.add(b'!').add(b'\'').add(b'(').add(b')').add(b'~');
48
49/// Encode text.
50#[inline]
51pub fn encode<'a, S: ?Sized + AsRef<str>>(
52    text: &'a S,
53    ascii_set: &'static AsciiSet,
54) -> Cow<'a, str> {
55    Cow::from(utf8_percent_encode(text.as_ref(), ascii_set))
56}
57
58/// Write text to a mutable `String` reference and return the encoded string slice.
59#[inline]
60pub fn encode_to_string<'a, S: AsRef<str>>(
61    text: S,
62    ascii_set: &'static AsciiSet,
63    output: &'a mut String,
64) -> &'a str {
65    unsafe { from_utf8_unchecked(encode_to_vec(text, ascii_set, output.as_mut_vec())) }
66}
67
68/// Write text to a mutable `Vec<u8>` reference and return the encoded data slice.
69pub fn encode_to_vec<'a, S: AsRef<str>>(
70    text: S,
71    ascii_set: &'static AsciiSet,
72    output: &'a mut Vec<u8>,
73) -> &'a [u8] {
74    let text = text.as_ref();
75    let text_bytes = text.as_bytes();
76    let text_length = text_bytes.len();
77
78    output.reserve(text_length);
79
80    let current_length = output.len();
81
82    let pe = utf8_percent_encode(text, ascii_set);
83
84    output.extend(pe.flat_map(|e| e.bytes()));
85
86    &output[current_length..]
87}
88
89/// Write text to a writer.
90#[inline]
91pub fn encode_to_writer<S: AsRef<str>, W: Write>(
92    text: S,
93    ascii_set: &'static AsciiSet,
94    output: &mut W,
95) -> Result<(), io::Error> {
96    let pe = utf8_percent_encode(text.as_ref(), ascii_set);
97
98    for s in pe {
99        output.write_all(s.as_bytes())?;
100    }
101
102    Ok(())
103}
104
105macro_rules! encode_impl {
106    ($(#[$attr: meta])* $escape_set:ident; $(#[$encode_attr: meta])* $encode_name: ident; $(#[$encode_to_string_attr: meta])* $encode_to_string_name: ident; $(#[$encode_to_vec_attr: meta])* $encode_to_vec_name: ident; $(#[$encode_to_writer_attr: meta])* $encode_to_writer_name: ident $(;)*) => {
107        $(#[$encode_attr])*
108        ///
109        $(#[$attr])*
110        #[inline]
111        pub fn $encode_name<S: ?Sized + AsRef<str>>(text: &S) -> Cow<str> {
112            encode(text, $escape_set)
113        }
114
115        $(#[$encode_to_string_attr])*
116        ///
117        $(#[$attr])*
118        #[inline]
119        pub fn $encode_to_string_name<S: AsRef<str>>(text: S, output: &mut String) -> &str {
120            encode_to_string(text, $escape_set, output)
121        }
122
123        $(#[$encode_to_vec_attr])*
124        ///
125        $(#[$attr])*
126        #[inline]
127        pub fn $encode_to_vec_name<S: AsRef<str>>(text: S, output: &mut Vec<u8>) -> &[u8] {
128            encode_to_vec(text, $escape_set, output)
129        }
130
131        $(#[$encode_to_writer_attr])*
132        ///
133        $(#[$attr])*
134        #[inline]
135        pub fn $encode_to_writer_name<S: AsRef<str>, W: Write>(text: S, output: &mut W) -> Result<(), io::Error> {
136            encode_to_writer(text, $escape_set, output)
137        }
138    };
139}
140
141encode_impl! {
142    /// The following characters are escaped:
143    ///
144    /// C0 controls and,
145    ///
146    /// * SPACE
147    /// * `"`
148    /// * `<`
149    /// * `>`
150    /// * <code>&#096;</code>
151    ///
152    /// and all code points greater than `~` (U+007E) are escaped.
153    FRAGMENT;
154    /// Encode text used in a fragment part.
155    encode_fragment;
156    /// Write text used in a fragment part to a mutable `String` reference and return the encoded string slice.
157    encode_fragment_to_string;
158    /// Write text used in a fragment part to a mutable `Vec<u8>` reference and return the encoded data slice.
159    encode_fragment_to_vec;
160    /// Write text used in a fragment part to a writer.
161    encode_fragment_to_writer;
162}
163
164encode_impl! {
165    /// The following characters are escaped:
166    ///
167    /// C0 controls and,
168    ///
169    /// * SPACE
170    /// * `"`
171    /// * `#`
172    /// * `<`
173    /// * `>`
174    ///
175    /// and all code points greater than `~` (U+007E) are escaped.
176    QUERY;
177    /// Encode text used in the query part.
178    encode_query;
179    /// Write text used in the query part to a mutable `String` reference and return the encoded string slice.
180    encode_query_to_string;
181    /// Write text used in the query part to a mutable `Vec<u8>` reference and return the encoded data slice.
182    encode_query_to_vec;
183    /// Write text used in the query part to a writer.
184    encode_query_to_writer;
185}
186
187encode_impl! {
188    /// The following characters are escaped:
189    ///
190    /// C0 controls and,
191    ///
192    /// * SPACE
193    /// * `"`
194    /// * `#`
195    /// * `'`
196    /// * `<`
197    /// * `>`
198    ///
199    /// and all code points greater than `~` (U+007E) are escaped.
200    ///
201    /// The term "special" means whether a URL is special. A URL is special is the scheme of that URL is **ftp**, **file** , **http**, **https**, **ws**, or **wss**.
202    SPECIAL_QUERY;
203    /// Encode text used in the query part.
204    encode_special_query;
205    /// Write text used in the query part to a mutable `String` reference and return the encoded string slice.
206    encode_special_query_to_string;
207    /// Write text used in the query part to a mutable `Vec<u8>` reference and return the encoded data slice.
208    encode_special_query_to_vec;
209    /// Write text used in the query part to a writer.
210    encode_special_query_to_writer;
211}
212
213encode_impl! {
214    /// The following characters are escaped:
215    ///
216    /// C0 controls and,
217    ///
218    /// * SPACE
219    /// * `"`
220    /// * `#`
221    /// * `<`
222    /// * `>`
223    /// * `?`
224    /// * <code>&#096;</code>
225    /// * `{`
226    /// * `}`
227    ///
228    /// and all code points greater than `~` (U+007E) are escaped.
229    PATH;
230    /// Encode text used in the path part.
231    encode_path;
232    /// Write text used in the path part to a mutable `String` reference and return the encoded string slice.
233    encode_path_to_string;
234    /// Write text used in the path part to a mutable `Vec<u8>` reference and return the encoded data slice.
235    encode_path_to_vec;
236    /// Write text used in the path part to a writer.
237    encode_path_to_writer;
238}
239
240encode_impl! {
241    /// The following characters are escaped:
242    ///
243    /// C0 controls and,
244    ///
245    /// * SPACE
246    /// * `"`
247    /// * `#`
248    /// * `/`
249    /// * `:`
250    /// * `;`
251    /// * `<`
252    /// * `=`
253    /// * `>`
254    /// * `?`
255    /// * `@`
256    /// * `[`
257    /// * `\`
258    /// * `]`
259    /// * `^`
260    /// * <code>&#096;</code>
261    /// * `{`
262    /// * `}`
263    /// * `|`
264    ///
265    /// and all code points greater than `~` (U+007E) are escaped.
266    USERINFO;
267    /// Encode text used in the userinfo part.
268    encode_userinfo;
269    /// Write text used in the userinfo part to a mutable `String` reference and return the encoded string slice.
270    encode_userinfo_to_string;
271    /// Write text used in the userinfo part to a mutable `Vec<u8>` reference and return the encoded data slice.
272    encode_userinfo_to_vec;
273    /// Write text used in the userinfo part to a writer.
274    encode_userinfo_to_writer;
275}
276
277encode_impl! {
278    /// The following characters are escaped:
279    ///
280    /// C0 controls and,
281    ///
282    /// * SPACE
283    /// * `"`
284    /// * `#`
285    /// * `$`
286    /// * `%`
287    /// * `&`
288    /// * `+`
289    /// * `,`
290    /// * `/`
291    /// * `:`
292    /// * `;`
293    /// * `<`
294    /// * `=`
295    /// * `>`
296    /// * `?`
297    /// * `@`
298    /// * `[`
299    /// * `\`
300    /// * `]`
301    /// * `^`
302    /// * <code>&#096;</code>
303    /// * `{`
304    /// * `}`
305    /// * `|`
306    ///
307    /// and all code points greater than `~` (U+007E) are escaped.
308    ///
309    /// It gives identical results to JavaScript's `encodeURIComponent()`.
310    COMPONENT;
311    /// Encode text used in a component.
312    encode_component;
313    /// Write text used in a component to a mutable `String` reference and return the encoded string slice.
314    encode_component_to_string;
315    /// Write text used in a component to a mutable `Vec<u8>` reference and return the encoded data slice.
316    encode_component_to_vec;
317    /// Write text used in a component to a writer.
318    encode_component_to_writer;
319}
320
321encode_impl! {
322    /// The following characters are escaped:
323    ///
324    /// C0 controls and,
325    ///
326    /// * SPACE
327    /// * `!`
328    /// * `"`
329    /// * `#`
330    /// * `$`
331    /// * `%`
332    /// * `&`
333    /// * `'`
334    /// * `(`
335    /// * `)`
336    /// * `+`
337    /// * `,`
338    /// * `/`
339    /// * `:`
340    /// * `;`
341    /// * `<`
342    /// * `=`
343    /// * `>`
344    /// * `?`
345    /// * `@`
346    /// * `[`
347    /// * `\`
348    /// * `]`
349    /// * `^`
350    /// * <code>&#096;</code>
351    /// * `{`
352    /// * `}`
353    /// * `|`
354    /// * `~`
355    ///
356    /// and all code points greater than `~` (U+007E) are escaped.
357    X_WWW_FORM_URLENCODED;
358    /// Encode text as a www-form-urlencoded text.
359    encode_www_form_urlencoded;
360    /// Write text as a urlencoded text to a mutable `String` reference and return the encoded string slice.
361    encode_www_form_urlencoded_to_string;
362    /// Write text as a www-form-urlencoded text to a mutable `Vec<u8>` reference and return the encoded data slice.
363    encode_www_form_urlencoded_to_vec;
364    /// Write text as a www-form-urlencoded text to a writer.
365    encode_www_form_urlencoded_to_writer;
366}