uri_encode/lib.rs
1//! URI percent-encoding functions.
2//!
3//! This crate provides functions for percent-encoding strings according to RFC 3986,
4//! with APIs that mirror JavaScript's `encodeURI()` and `encodeURIComponent()` functions.
5//!
6//! # Overview
7//!
8//! URI encoding (also called percent-encoding) replaces unsafe ASCII characters with
9//! a `%` followed by two hexadecimal digits representing the character's byte value.
10//! For example, a space becomes `%20`.
11//!
12//! Different contexts require different encoding rules:
13//!
14//! - [`encode_uri`] preserves URI structure characters, suitable for encoding complete URLs
15//! - [`encode_uri_component`] encodes more aggressively, suitable for path segments or query values
16//! - [`encode_query_param`] uses `+` for spaces (application/x-www-form-urlencoded style)
17//!
18//! # Examples
19//!
20//! ```
21//! use uri_encode::{encode_uri, encode_uri_component, encode_query_param};
22//!
23//! // Encoding a complete URL preserves its structure
24//! let url = "https://example.com/path?q=hello world";
25//! assert_eq!(encode_uri(url), "https://example.com/path?q=hello%20world");
26//!
27//! // Encoding a component is more aggressive
28//! let component = "hello world&foo=bar";
29//! assert_eq!(encode_uri_component(component), "hello%20world%26foo%3dbar");
30//!
31//! // Query parameters use + for spaces
32//! let param = "hello world";
33//! assert_eq!(encode_query_param(param), "hello+world");
34//! ```
35//!
36//! # Character Sets
37//!
38//! The following characters are never encoded by any function (unreserved characters per RFC 3986):
39//!
40//! - Alphanumeric: `A-Z`, `a-z`, `0-9`
41//! - Special: `-`, `_`, `.`, `!`, `~`, `*`, `'`, `(`, `)`
42//!
43//! Additionally, [`encode_uri`] and [`encode_query_param`] preserve these reserved characters:
44//!
45//! - `;`, `,`, `/`, `?`, `:`, `@`, `&`, `=`, `+`, `$`, `#`
46//!
47//! # No Unsafe Code
48//!
49//! This crate uses `#![forbid(unsafe_code)]` and has zero dependencies.
50
51#![forbid(unsafe_code)]
52
53/// Encodes a complete URI, preserving its structural characters.
54///
55/// This function is equivalent to JavaScript's `encodeURI()`. It encodes all characters
56/// except those that are valid in a URI, including reserved characters that have special
57/// meaning in URI syntax.
58///
59/// # Preserved Characters
60///
61/// The following characters are passed through unchanged:
62///
63/// - Unreserved: `A-Z`, `a-z`, `0-9`, `-`, `_`, `.`, `!`, `~`, `*`, `'`, `(`, `)`
64/// - Reserved: `;`, `,`, `/`, `?`, `:`, `@`, `&`, `=`, `+`, `$`, `#`
65///
66/// All other characters are percent-encoded.
67///
68/// # Use Cases
69///
70/// Use this function when you have a complete URL and want to ensure any unsafe
71/// characters (like spaces) are encoded while preserving the URL's structure.
72///
73/// # Examples
74///
75/// ```
76/// use uri_encode::encode_uri;
77///
78/// // Spaces are encoded, but URL structure is preserved
79/// assert_eq!(
80/// encode_uri("https://example.com/hello world?name=foo bar"),
81/// "https://example.com/hello%20world?name=foo%20bar"
82/// );
83///
84/// // Reserved characters are preserved
85/// assert_eq!(encode_uri("a/b?c=d&e=f"), "a/b?c=d&e=f");
86///
87/// // Non-ASCII characters are encoded
88/// assert_eq!(encode_uri("café"), "caf%c3%a9");
89/// ```
90pub fn encode_uri(s: impl AsRef<str>) -> String {
91 let mut encoded = String::with_capacity(s.as_ref().len());
92 for c in s.as_ref().as_bytes() {
93 match c {
94 b'A'..=b'Z'
95 | b'a'..=b'z'
96 | b'0'..=b'9'
97 | b'-'
98 | b'_'
99 | b'.'
100 | b'!'
101 | b'~'
102 | b'*'
103 | b'\''
104 | b'('
105 | b')'
106 | b';'
107 | b','
108 | b'/'
109 | b'?'
110 | b':'
111 | b'@'
112 | b'&'
113 | b'='
114 | b'+'
115 | b'$'
116 | b'#' => encoded.push(char::from_u32(*c as _).unwrap()),
117 c => {
118 encoded.push('%');
119 encoded.push_str(&format!("{:02x}", c));
120 }
121 }
122 }
123 encoded
124}
125
126/// Encodes a URI component, such as a path segment or query value.
127///
128/// This function is equivalent to JavaScript's `encodeURIComponent()`. It performs
129/// more aggressive encoding than [`encode_uri`], encoding all characters except
130/// unreserved characters.
131///
132/// # Preserved Characters
133///
134/// Only unreserved characters are passed through unchanged:
135///
136/// - `A-Z`, `a-z`, `0-9`, `-`, `_`, `.`, `!`, `~`, `*`, `'`, `(`, `)`
137///
138/// All other characters, including reserved URI characters like `/`, `?`, `&`, `=`,
139/// are percent-encoded.
140///
141/// # Use Cases
142///
143/// Use this function when encoding values that will become part of a URL:
144///
145/// - Path segments
146/// - Query parameter names and values
147/// - Fragment identifiers
148/// - Any user-provided input that will be embedded in a URL
149///
150/// # Examples
151///
152/// ```
153/// use uri_encode::encode_uri_component;
154///
155/// // Reserved characters are encoded
156/// assert_eq!(encode_uri_component("a/b?c=d"), "a%2fb%3fc%3dd");
157///
158/// // Useful for query values
159/// let search = "hello world";
160/// let url = format!("https://example.com/search?q={}", encode_uri_component(search));
161/// assert_eq!(url, "https://example.com/search?q=hello%20world");
162///
163/// // Handles special characters safely
164/// assert_eq!(encode_uri_component("<script>"), "%3cscript%3e");
165/// ```
166pub fn encode_uri_component(s: impl AsRef<str>) -> String {
167 let mut encoded = String::with_capacity(s.as_ref().len());
168 for c in s.as_ref().as_bytes() {
169 match c {
170 b'A'..=b'Z'
171 | b'a'..=b'z'
172 | b'0'..=b'9'
173 | b'-'
174 | b'_'
175 | b'.'
176 | b'!'
177 | b'~'
178 | b'*'
179 | b'\''
180 | b'('
181 | b')' => encoded.push(char::from_u32(*c as _).unwrap()),
182 c => {
183 encoded.push('%');
184 encoded.push_str(&format!("{:02x}", c));
185 }
186 }
187 }
188 encoded
189}
190
191/// Encodes a query parameter name or value using `+` for spaces.
192///
193/// This function follows the `application/x-www-form-urlencoded` encoding format,
194/// which is commonly used in HTML form submissions. The key difference from
195/// [`encode_uri_component`] is that spaces are encoded as `+` instead of `%20`.
196///
197/// # Preserved Characters
198///
199/// The following characters are passed through unchanged:
200///
201/// - Unreserved: `A-Z`, `a-z`, `0-9`, `-`, `_`, `.`, `!`, `~`, `*`, `'`, `(`, `)`
202/// - Reserved: `;`, `,`, `/`, `?`, `:`, `@`, `&`, `=`, `+`, `$`, `#`
203///
204/// Spaces are converted to `+`, and all other characters are percent-encoded.
205///
206/// # Use Cases
207///
208/// Use this function when building query strings that will be submitted as form data
209/// or when you want the more compact `+` encoding for spaces.
210///
211/// # Examples
212///
213/// ```
214/// use uri_encode::encode_query_param;
215///
216/// // Spaces become +
217/// assert_eq!(encode_query_param("hello world"), "hello+world");
218///
219/// // Building a query string
220/// let name = "John Doe";
221/// let city = "New York";
222/// let query = format!("name={}&city={}", encode_query_param(name), encode_query_param(city));
223/// assert_eq!(query, "name=John+Doe&city=New+York");
224/// ```
225pub fn encode_query_param(s: impl AsRef<str>) -> String {
226 let mut encoded = String::with_capacity(s.as_ref().len());
227 for c in s.as_ref().as_bytes() {
228 match c {
229 b' ' => encoded.push('+'),
230 b'A'..=b'Z'
231 | b'a'..=b'z'
232 | b'0'..=b'9'
233 | b'-'
234 | b'_'
235 | b'.'
236 | b'!'
237 | b'~'
238 | b'*'
239 | b'\''
240 | b'('
241 | b')'
242 | b';'
243 | b','
244 | b'/'
245 | b'?'
246 | b':'
247 | b'@'
248 | b'&'
249 | b'='
250 | b'+'
251 | b'$'
252 | b'#' => encoded.push(char::from_u32(*c as _).unwrap()),
253 c => {
254 encoded.push('%');
255 encoded.push_str(&format!("{:02x}", c));
256 }
257 }
258 }
259 encoded
260}
261
262#[cfg(test)]
263mod tests {
264 use super::*;
265
266 mod encode_uri_tests {
267 use super::*;
268
269 #[test]
270 fn empty_string() {
271 assert_eq!(encode_uri(""), "");
272 }
273
274 #[test]
275 fn alphanumeric_unchanged() {
276 assert_eq!(encode_uri("ABCxyz123"), "ABCxyz123");
277 }
278
279 #[test]
280 fn unreserved_chars_unchanged() {
281 assert_eq!(encode_uri("-_.!~*'()"), "-_.!~*'()");
282 }
283
284 #[test]
285 fn reserved_chars_unchanged() {
286 assert_eq!(encode_uri(";,/?:@&=+$#"), ";,/?:@&=+$#");
287 }
288
289 #[test]
290 fn spaces_encoded() {
291 assert_eq!(encode_uri("hello world"), "hello%20world");
292 }
293
294 #[test]
295 fn complete_url_structure_preserved() {
296 assert_eq!(
297 encode_uri("https://user:pass@example.com:8080/path?q=1&r=2#frag"),
298 "https://user:pass@example.com:8080/path?q=1&r=2#frag"
299 );
300 }
301
302 #[test]
303 fn url_with_spaces() {
304 assert_eq!(
305 encode_uri("https://example.com/hello world?name=foo bar"),
306 "https://example.com/hello%20world?name=foo%20bar"
307 );
308 }
309
310 #[test]
311 fn non_ascii_encoded() {
312 assert_eq!(encode_uri("café"), "caf%c3%a9");
313 assert_eq!(encode_uri("日本"), "%e6%97%a5%e6%9c%ac");
314 }
315
316 #[test]
317 fn special_chars_encoded() {
318 assert_eq!(encode_uri("<>\""), "%3c%3e%22");
319 assert_eq!(encode_uri("{}|\\^`"), "%7b%7d%7c%5c%5e%60");
320 }
321
322 #[test]
323 fn accepts_string_slice() {
324 let s = String::from("test value");
325 assert_eq!(encode_uri(&s), "test%20value");
326 assert_eq!(encode_uri(s), "test%20value");
327 }
328 }
329
330 mod encode_uri_component_tests {
331 use super::*;
332
333 #[test]
334 fn empty_string() {
335 assert_eq!(encode_uri_component(""), "");
336 }
337
338 #[test]
339 fn alphanumeric_unchanged() {
340 assert_eq!(encode_uri_component("ABCxyz123"), "ABCxyz123");
341 }
342
343 #[test]
344 fn unreserved_chars_unchanged() {
345 assert_eq!(encode_uri_component("-_.!~*'()"), "-_.!~*'()");
346 }
347
348 #[test]
349 fn reserved_chars_encoded() {
350 assert_eq!(
351 encode_uri_component(";,/?:@&=+$#"),
352 "%3b%2c%2f%3f%3a%40%26%3d%2b%24%23"
353 );
354 }
355
356 #[test]
357 fn spaces_encoded() {
358 assert_eq!(encode_uri_component("hello world"), "hello%20world");
359 }
360
361 #[test]
362 fn path_segment() {
363 assert_eq!(encode_uri_component("path/to/file"), "path%2fto%2ffile");
364 }
365
366 #[test]
367 fn query_value_with_special_chars() {
368 assert_eq!(encode_uri_component("a=1&b=2"), "a%3d1%26b%3d2");
369 }
370
371 #[test]
372 fn html_entities_encoded() {
373 assert_eq!(encode_uri_component("<script>"), "%3cscript%3e");
374 assert_eq!(encode_uri_component("\"alert\""), "%22alert%22");
375 }
376
377 #[test]
378 fn non_ascii_encoded() {
379 assert_eq!(encode_uri_component("é"), "%c3%a9");
380 assert_eq!(encode_uri_component("émoji"), "%c3%a9moji");
381 }
382
383 #[test]
384 fn all_bytes_handled() {
385 // Test that control characters are encoded
386 assert_eq!(encode_uri_component("\x00"), "%00");
387 assert_eq!(encode_uri_component("\x1f"), "%1f");
388 assert_eq!(encode_uri_component("\x7f"), "%7f");
389 }
390 }
391
392 mod encode_query_param_tests {
393 use super::*;
394
395 #[test]
396 fn empty_string() {
397 assert_eq!(encode_query_param(""), "");
398 }
399
400 #[test]
401 fn alphanumeric_unchanged() {
402 assert_eq!(encode_query_param("ABCxyz123"), "ABCxyz123");
403 }
404
405 #[test]
406 fn spaces_become_plus() {
407 assert_eq!(encode_query_param("hello world"), "hello+world");
408 assert_eq!(encode_query_param(" "), "++");
409 }
410
411 #[test]
412 fn reserved_chars_unchanged() {
413 assert_eq!(encode_query_param(";,/?:@&=+$#"), ";,/?:@&=+$#");
414 }
415
416 #[test]
417 fn form_data_encoding() {
418 assert_eq!(encode_query_param("John Doe"), "John+Doe");
419 assert_eq!(encode_query_param("New York"), "New+York");
420 }
421
422 #[test]
423 fn non_ascii_encoded() {
424 assert_eq!(encode_query_param("naïve"), "na%c3%afve");
425 }
426
427 #[test]
428 fn mixed_content() {
429 assert_eq!(
430 encode_query_param("value with spaces & special <chars>"),
431 "value+with+spaces+&+special+%3cchars%3e"
432 );
433 }
434 }
435
436 mod comparison_tests {
437 use super::*;
438
439 #[test]
440 fn encode_uri_vs_component_slash() {
441 assert_eq!(encode_uri("/"), "/");
442 assert_eq!(encode_uri_component("/"), "%2f");
443 }
444
445 #[test]
446 fn encode_uri_vs_component_question() {
447 assert_eq!(encode_uri("?"), "?");
448 assert_eq!(encode_uri_component("?"), "%3f");
449 }
450
451 #[test]
452 fn encode_uri_vs_component_ampersand() {
453 assert_eq!(encode_uri("&"), "&");
454 assert_eq!(encode_uri_component("&"), "%26");
455 }
456
457 #[test]
458 fn encode_uri_vs_query_param_space() {
459 assert_eq!(encode_uri(" "), "%20");
460 assert_eq!(encode_uri_component(" "), "%20");
461 assert_eq!(encode_query_param(" "), "+");
462 }
463
464 #[test]
465 fn all_functions_same_on_alphanumeric() {
466 let s = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789";
467 assert_eq!(encode_uri(s), s);
468 assert_eq!(encode_uri_component(s), s);
469 assert_eq!(encode_query_param(s), s);
470 }
471
472 #[test]
473 fn all_functions_same_on_unreserved() {
474 let s = "-_.!~*'()";
475 assert_eq!(encode_uri(s), s);
476 assert_eq!(encode_uri_component(s), s);
477 assert_eq!(encode_query_param(s), s);
478 }
479 }
480
481 mod edge_cases {
482 use super::*;
483
484 #[test]
485 fn percent_sign_encoded() {
486 assert_eq!(encode_uri("%"), "%25");
487 assert_eq!(encode_uri_component("%"), "%25");
488 assert_eq!(encode_query_param("%"), "%25");
489 }
490
491 #[test]
492 fn already_encoded_gets_double_encoded() {
493 // This is correct behavior - we encode the input as-is
494 assert_eq!(encode_uri("%20"), "%2520");
495 assert_eq!(encode_uri_component("%20"), "%2520");
496 }
497
498 #[test]
499 fn unicode_multi_byte() {
500 // 3-byte UTF-8 character
501 assert_eq!(encode_uri("€"), "%e2%82%ac");
502 // 4-byte UTF-8 character (emoji)
503 assert_eq!(encode_uri("🦀"), "%f0%9f%a6%80");
504 }
505
506 #[test]
507 fn long_string() {
508 let input = "a".repeat(10000);
509 assert_eq!(encode_uri(&input), input);
510 }
511
512 #[test]
513 fn many_encoded_chars() {
514 let input = " ".repeat(1000);
515 let expected_uri = "%20".repeat(1000);
516 let expected_query = "+".repeat(1000);
517 assert_eq!(encode_uri(&input), expected_uri);
518 assert_eq!(encode_query_param(&input), expected_query);
519 }
520 }
521}