url_escape/encode/mod.rs
1// Ref: https://url.spec.whatwg.org/
2
3use std::borrow::Cow;
4use std::io::{self, Write};
5use std::str::from_utf8_unchecked;
6
7use crate::percent_encoding::{utf8_percent_encode, AsciiSet};
8
9/// The C0 control percent-encode set are the C0 controls and U+007F (DEL).
10pub use percent_encoding::CONTROLS;
11
12/// Not an ASCII letter or digit.
13pub use percent_encoding::NON_ALPHANUMERIC;
14
15/// The fragment percent-encode set is the C0 control percent-encode set and U+0020 SPACE, U+0022 ("), U+003C (<), U+003E (>), and U+0060 (`).
16pub const FRAGMENT: &AsciiSet = &CONTROLS.add(b' ').add(b'"').add(b'<').add(b'>').add(b'`');
17
18/// The query percent-encode set is the C0 control percent-encode set and U+0020 SPACE, U+0022 ("), U+0023 (#), U+003C (<), and U+003E (>).
19///
20/// The query percent-encode set cannot be defined in terms of the fragment percent-encode set due to the omission of U+0060 (`).
21pub const QUERY: &AsciiSet = &CONTROLS.add(b' ').add(b'"').add(b'#').add(b'<').add(b'>');
22
23/// The special-query percent-encode set is the query percent-encode set and U+0027 (').
24pub const SPECIAL_QUERY: &AsciiSet = &QUERY.add(b'\'');
25
26/// The path percent-encode set is the query percent-encode set and U+003F (?), U+0060 (`), U+007B ({), and U+007D (}).
27pub const PATH: &AsciiSet = &QUERY.add(b'?').add(b'`').add(b'{').add(b'}');
28
29/// The userinfo percent-encode set is the path percent-encode set and U+002F (/), U+003A (:), U+003B (;), U+003D (=), U+0040 (@), U+005B ([) to U+005E (^), inclusive, and U+007C (|).
30pub const USERINFO: &AsciiSet = &PATH
31 .add(b'/')
32 .add(b':')
33 .add(b';')
34 .add(b'=')
35 .add(b'@')
36 .add(b'[')
37 .add(b'\\')
38 .add(b']')
39 .add(b'^')
40 .add(b'|');
41
42/// The component percent-encode set is the userinfo percent-encode set and U+0024 ($) to U+0026 (&), inclusive, U+002B (+), and U+002C (,).
43pub const COMPONENT: &AsciiSet = &USERINFO.add(b'$').add(b'%').add(b'&').add(b'+').add(b',');
44
45/// The application/x-www-form-urlencoded percent-encode set is the component percent-encode set and U+0021 (!), U+0027 (') to U+0029 RIGHT PARENTHESIS, inclusive, and U+007E (~).
46pub const X_WWW_FORM_URLENCODED: &AsciiSet =
47 &COMPONENT.add(b'!').add(b'\'').add(b'(').add(b')').add(b'~');
48
49/// Encode text.
50#[inline]
51pub fn encode<'a, S: ?Sized + AsRef<str>>(
52 text: &'a S,
53 ascii_set: &'static AsciiSet,
54) -> Cow<'a, str> {
55 Cow::from(utf8_percent_encode(text.as_ref(), ascii_set))
56}
57
58/// Write text to a mutable `String` reference and return the encoded string slice.
59#[inline]
60pub fn encode_to_string<'a, S: AsRef<str>>(
61 text: S,
62 ascii_set: &'static AsciiSet,
63 output: &'a mut String,
64) -> &'a str {
65 unsafe { from_utf8_unchecked(encode_to_vec(text, ascii_set, output.as_mut_vec())) }
66}
67
68/// Write text to a mutable `Vec<u8>` reference and return the encoded data slice.
69pub fn encode_to_vec<'a, S: AsRef<str>>(
70 text: S,
71 ascii_set: &'static AsciiSet,
72 output: &'a mut Vec<u8>,
73) -> &'a [u8] {
74 let text = text.as_ref();
75 let text_bytes = text.as_bytes();
76 let text_length = text_bytes.len();
77
78 output.reserve(text_length);
79
80 let current_length = output.len();
81
82 let pe = utf8_percent_encode(text, ascii_set);
83
84 output.extend(pe.flat_map(|e| e.bytes()));
85
86 &output[current_length..]
87}
88
89/// Write text to a writer.
90#[inline]
91pub fn encode_to_writer<S: AsRef<str>, W: Write>(
92 text: S,
93 ascii_set: &'static AsciiSet,
94 output: &mut W,
95) -> Result<(), io::Error> {
96 let pe = utf8_percent_encode(text.as_ref(), ascii_set);
97
98 for s in pe {
99 output.write_all(s.as_bytes())?;
100 }
101
102 Ok(())
103}
104
105macro_rules! encode_impl {
106 ($(#[$attr: meta])* $escape_set:ident; $(#[$encode_attr: meta])* $encode_name: ident; $(#[$encode_to_string_attr: meta])* $encode_to_string_name: ident; $(#[$encode_to_vec_attr: meta])* $encode_to_vec_name: ident; $(#[$encode_to_writer_attr: meta])* $encode_to_writer_name: ident $(;)*) => {
107 $(#[$encode_attr])*
108 ///
109 $(#[$attr])*
110 #[inline]
111 pub fn $encode_name<S: ?Sized + AsRef<str>>(text: &S) -> Cow<str> {
112 encode(text, $escape_set)
113 }
114
115 $(#[$encode_to_string_attr])*
116 ///
117 $(#[$attr])*
118 #[inline]
119 pub fn $encode_to_string_name<S: AsRef<str>>(text: S, output: &mut String) -> &str {
120 encode_to_string(text, $escape_set, output)
121 }
122
123 $(#[$encode_to_vec_attr])*
124 ///
125 $(#[$attr])*
126 #[inline]
127 pub fn $encode_to_vec_name<S: AsRef<str>>(text: S, output: &mut Vec<u8>) -> &[u8] {
128 encode_to_vec(text, $escape_set, output)
129 }
130
131 $(#[$encode_to_writer_attr])*
132 ///
133 $(#[$attr])*
134 #[inline]
135 pub fn $encode_to_writer_name<S: AsRef<str>, W: Write>(text: S, output: &mut W) -> Result<(), io::Error> {
136 encode_to_writer(text, $escape_set, output)
137 }
138 };
139}
140
141encode_impl! {
142 /// The following characters are escaped:
143 ///
144 /// C0 controls and,
145 ///
146 /// * SPACE
147 /// * `"`
148 /// * `<`
149 /// * `>`
150 /// * <code>`</code>
151 ///
152 /// and all code points greater than `~` (U+007E) are escaped.
153 FRAGMENT;
154 /// Encode text used in a fragment part.
155 encode_fragment;
156 /// Write text used in a fragment part to a mutable `String` reference and return the encoded string slice.
157 encode_fragment_to_string;
158 /// Write text used in a fragment part to a mutable `Vec<u8>` reference and return the encoded data slice.
159 encode_fragment_to_vec;
160 /// Write text used in a fragment part to a writer.
161 encode_fragment_to_writer;
162}
163
164encode_impl! {
165 /// The following characters are escaped:
166 ///
167 /// C0 controls and,
168 ///
169 /// * SPACE
170 /// * `"`
171 /// * `#`
172 /// * `<`
173 /// * `>`
174 ///
175 /// and all code points greater than `~` (U+007E) are escaped.
176 QUERY;
177 /// Encode text used in the query part.
178 encode_query;
179 /// Write text used in the query part to a mutable `String` reference and return the encoded string slice.
180 encode_query_to_string;
181 /// Write text used in the query part to a mutable `Vec<u8>` reference and return the encoded data slice.
182 encode_query_to_vec;
183 /// Write text used in the query part to a writer.
184 encode_query_to_writer;
185}
186
187encode_impl! {
188 /// The following characters are escaped:
189 ///
190 /// C0 controls and,
191 ///
192 /// * SPACE
193 /// * `"`
194 /// * `#`
195 /// * `'`
196 /// * `<`
197 /// * `>`
198 ///
199 /// and all code points greater than `~` (U+007E) are escaped.
200 ///
201 /// The term "special" means whether a URL is special. A URL is special is the scheme of that URL is **ftp**, **file** , **http**, **https**, **ws**, or **wss**.
202 SPECIAL_QUERY;
203 /// Encode text used in the query part.
204 encode_special_query;
205 /// Write text used in the query part to a mutable `String` reference and return the encoded string slice.
206 encode_special_query_to_string;
207 /// Write text used in the query part to a mutable `Vec<u8>` reference and return the encoded data slice.
208 encode_special_query_to_vec;
209 /// Write text used in the query part to a writer.
210 encode_special_query_to_writer;
211}
212
213encode_impl! {
214 /// The following characters are escaped:
215 ///
216 /// C0 controls and,
217 ///
218 /// * SPACE
219 /// * `"`
220 /// * `#`
221 /// * `<`
222 /// * `>`
223 /// * `?`
224 /// * <code>`</code>
225 /// * `{`
226 /// * `}`
227 ///
228 /// and all code points greater than `~` (U+007E) are escaped.
229 PATH;
230 /// Encode text used in the path part.
231 encode_path;
232 /// Write text used in the path part to a mutable `String` reference and return the encoded string slice.
233 encode_path_to_string;
234 /// Write text used in the path part to a mutable `Vec<u8>` reference and return the encoded data slice.
235 encode_path_to_vec;
236 /// Write text used in the path part to a writer.
237 encode_path_to_writer;
238}
239
240encode_impl! {
241 /// The following characters are escaped:
242 ///
243 /// C0 controls and,
244 ///
245 /// * SPACE
246 /// * `"`
247 /// * `#`
248 /// * `/`
249 /// * `:`
250 /// * `;`
251 /// * `<`
252 /// * `=`
253 /// * `>`
254 /// * `?`
255 /// * `@`
256 /// * `[`
257 /// * `\`
258 /// * `]`
259 /// * `^`
260 /// * <code>`</code>
261 /// * `{`
262 /// * `}`
263 /// * `|`
264 ///
265 /// and all code points greater than `~` (U+007E) are escaped.
266 USERINFO;
267 /// Encode text used in the userinfo part.
268 encode_userinfo;
269 /// Write text used in the userinfo part to a mutable `String` reference and return the encoded string slice.
270 encode_userinfo_to_string;
271 /// Write text used in the userinfo part to a mutable `Vec<u8>` reference and return the encoded data slice.
272 encode_userinfo_to_vec;
273 /// Write text used in the userinfo part to a writer.
274 encode_userinfo_to_writer;
275}
276
277encode_impl! {
278 /// The following characters are escaped:
279 ///
280 /// C0 controls and,
281 ///
282 /// * SPACE
283 /// * `"`
284 /// * `#`
285 /// * `$`
286 /// * `%`
287 /// * `&`
288 /// * `+`
289 /// * `,`
290 /// * `/`
291 /// * `:`
292 /// * `;`
293 /// * `<`
294 /// * `=`
295 /// * `>`
296 /// * `?`
297 /// * `@`
298 /// * `[`
299 /// * `\`
300 /// * `]`
301 /// * `^`
302 /// * <code>`</code>
303 /// * `{`
304 /// * `}`
305 /// * `|`
306 ///
307 /// and all code points greater than `~` (U+007E) are escaped.
308 ///
309 /// It gives identical results to JavaScript's `encodeURIComponent()`.
310 COMPONENT;
311 /// Encode text used in a component.
312 encode_component;
313 /// Write text used in a component to a mutable `String` reference and return the encoded string slice.
314 encode_component_to_string;
315 /// Write text used in a component to a mutable `Vec<u8>` reference and return the encoded data slice.
316 encode_component_to_vec;
317 /// Write text used in a component to a writer.
318 encode_component_to_writer;
319}
320
321encode_impl! {
322 /// The following characters are escaped:
323 ///
324 /// C0 controls and,
325 ///
326 /// * SPACE
327 /// * `!`
328 /// * `"`
329 /// * `#`
330 /// * `$`
331 /// * `%`
332 /// * `&`
333 /// * `'`
334 /// * `(`
335 /// * `)`
336 /// * `+`
337 /// * `,`
338 /// * `/`
339 /// * `:`
340 /// * `;`
341 /// * `<`
342 /// * `=`
343 /// * `>`
344 /// * `?`
345 /// * `@`
346 /// * `[`
347 /// * `\`
348 /// * `]`
349 /// * `^`
350 /// * <code>`</code>
351 /// * `{`
352 /// * `}`
353 /// * `|`
354 /// * `~`
355 ///
356 /// and all code points greater than `~` (U+007E) are escaped.
357 X_WWW_FORM_URLENCODED;
358 /// Encode text as a www-form-urlencoded text.
359 encode_www_form_urlencoded;
360 /// Write text as a urlencoded text to a mutable `String` reference and return the encoded string slice.
361 encode_www_form_urlencoded_to_string;
362 /// Write text as a www-form-urlencoded text to a mutable `Vec<u8>` reference and return the encoded data slice.
363 encode_www_form_urlencoded_to_vec;
364 /// Write text as a www-form-urlencoded text to a writer.
365 encode_www_form_urlencoded_to_writer;
366}