uri_encode/
lib.rs

1//! URI percent-encoding functions.
2//!
3//! This crate provides functions for percent-encoding strings according to RFC 3986,
4//! with APIs that mirror JavaScript's `encodeURI()` and `encodeURIComponent()` functions.
5//!
6//! # Overview
7//!
8//! URI encoding (also called percent-encoding) replaces unsafe ASCII characters with
9//! a `%` followed by two hexadecimal digits representing the character's byte value.
10//! For example, a space becomes `%20`.
11//!
12//! Different contexts require different encoding rules:
13//!
14//! - [`encode_uri`] preserves URI structure characters, suitable for encoding complete URLs
15//! - [`encode_uri_component`] encodes more aggressively, suitable for path segments or query values
16//! - [`encode_query_param`] uses `+` for spaces (application/x-www-form-urlencoded style)
17//!
18//! # Examples
19//!
20//! ```
21//! use uri_encode::{encode_uri, encode_uri_component, encode_query_param};
22//!
23//! // Encoding a complete URL preserves its structure
24//! let url = "https://example.com/path?q=hello world";
25//! assert_eq!(encode_uri(url), "https://example.com/path?q=hello%20world");
26//!
27//! // Encoding a component is more aggressive
28//! let component = "hello world&foo=bar";
29//! assert_eq!(encode_uri_component(component), "hello%20world%26foo%3dbar");
30//!
31//! // Query parameters use + for spaces
32//! let param = "hello world";
33//! assert_eq!(encode_query_param(param), "hello+world");
34//! ```
35//!
36//! # Character Sets
37//!
38//! The following characters are never encoded by any function (unreserved characters per RFC 3986):
39//!
40//! - Alphanumeric: `A-Z`, `a-z`, `0-9`
41//! - Special: `-`, `_`, `.`, `!`, `~`, `*`, `'`, `(`, `)`
42//!
43//! Additionally, [`encode_uri`] and [`encode_query_param`] preserve these reserved characters:
44//!
45//! - `;`, `,`, `/`, `?`, `:`, `@`, `&`, `=`, `+`, `$`, `#`
46//!
47//! # No Unsafe Code
48//!
49//! This crate uses `#![forbid(unsafe_code)]` and has zero dependencies.
50
51#![forbid(unsafe_code)]
52
53/// Encodes a complete URI, preserving its structural characters.
54///
55/// This function is equivalent to JavaScript's `encodeURI()`. It encodes all characters
56/// except those that are valid in a URI, including reserved characters that have special
57/// meaning in URI syntax.
58///
59/// # Preserved Characters
60///
61/// The following characters are passed through unchanged:
62///
63/// - Unreserved: `A-Z`, `a-z`, `0-9`, `-`, `_`, `.`, `!`, `~`, `*`, `'`, `(`, `)`
64/// - Reserved: `;`, `,`, `/`, `?`, `:`, `@`, `&`, `=`, `+`, `$`, `#`
65///
66/// All other characters are percent-encoded.
67///
68/// # Use Cases
69///
70/// Use this function when you have a complete URL and want to ensure any unsafe
71/// characters (like spaces) are encoded while preserving the URL's structure.
72///
73/// # Examples
74///
75/// ```
76/// use uri_encode::encode_uri;
77///
78/// // Spaces are encoded, but URL structure is preserved
79/// assert_eq!(
80///     encode_uri("https://example.com/hello world?name=foo bar"),
81///     "https://example.com/hello%20world?name=foo%20bar"
82/// );
83///
84/// // Reserved characters are preserved
85/// assert_eq!(encode_uri("a/b?c=d&e=f"), "a/b?c=d&e=f");
86///
87/// // Non-ASCII characters are encoded
88/// assert_eq!(encode_uri("café"), "caf%c3%a9");
89/// ```
90pub fn encode_uri(s: impl AsRef<str>) -> String {
91    let mut encoded = String::with_capacity(s.as_ref().len());
92    for c in s.as_ref().as_bytes() {
93        match c {
94            b'A'..=b'Z'
95            | b'a'..=b'z'
96            | b'0'..=b'9'
97            | b'-'
98            | b'_'
99            | b'.'
100            | b'!'
101            | b'~'
102            | b'*'
103            | b'\''
104            | b'('
105            | b')'
106            | b';'
107            | b','
108            | b'/'
109            | b'?'
110            | b':'
111            | b'@'
112            | b'&'
113            | b'='
114            | b'+'
115            | b'$'
116            | b'#' => encoded.push(char::from_u32(*c as _).unwrap()),
117            c => {
118                encoded.push('%');
119                encoded.push_str(&format!("{:02x}", c));
120            }
121        }
122    }
123    encoded
124}
125
126/// Encodes a URI component, such as a path segment or query value.
127///
128/// This function is equivalent to JavaScript's `encodeURIComponent()`. It performs
129/// more aggressive encoding than [`encode_uri`], encoding all characters except
130/// unreserved characters.
131///
132/// # Preserved Characters
133///
134/// Only unreserved characters are passed through unchanged:
135///
136/// - `A-Z`, `a-z`, `0-9`, `-`, `_`, `.`, `!`, `~`, `*`, `'`, `(`, `)`
137///
138/// All other characters, including reserved URI characters like `/`, `?`, `&`, `=`,
139/// are percent-encoded.
140///
141/// # Use Cases
142///
143/// Use this function when encoding values that will become part of a URL:
144///
145/// - Path segments
146/// - Query parameter names and values
147/// - Fragment identifiers
148/// - Any user-provided input that will be embedded in a URL
149///
150/// # Examples
151///
152/// ```
153/// use uri_encode::encode_uri_component;
154///
155/// // Reserved characters are encoded
156/// assert_eq!(encode_uri_component("a/b?c=d"), "a%2fb%3fc%3dd");
157///
158/// // Useful for query values
159/// let search = "hello world";
160/// let url = format!("https://example.com/search?q={}", encode_uri_component(search));
161/// assert_eq!(url, "https://example.com/search?q=hello%20world");
162///
163/// // Handles special characters safely
164/// assert_eq!(encode_uri_component("<script>"), "%3cscript%3e");
165/// ```
166pub fn encode_uri_component(s: impl AsRef<str>) -> String {
167    let mut encoded = String::with_capacity(s.as_ref().len());
168    for c in s.as_ref().as_bytes() {
169        match c {
170            b'A'..=b'Z'
171            | b'a'..=b'z'
172            | b'0'..=b'9'
173            | b'-'
174            | b'_'
175            | b'.'
176            | b'!'
177            | b'~'
178            | b'*'
179            | b'\''
180            | b'('
181            | b')' => encoded.push(char::from_u32(*c as _).unwrap()),
182            c => {
183                encoded.push('%');
184                encoded.push_str(&format!("{:02x}", c));
185            }
186        }
187    }
188    encoded
189}
190
191/// Encodes a query parameter name or value using `+` for spaces.
192///
193/// This function follows the `application/x-www-form-urlencoded` encoding format,
194/// which is commonly used in HTML form submissions. The key difference from
195/// [`encode_uri_component`] is that spaces are encoded as `+` instead of `%20`.
196///
197/// # Preserved Characters
198///
199/// The following characters are passed through unchanged:
200///
201/// - Unreserved: `A-Z`, `a-z`, `0-9`, `-`, `_`, `.`, `!`, `~`, `*`, `'`, `(`, `)`
202/// - Reserved: `;`, `,`, `/`, `?`, `:`, `@`, `&`, `=`, `+`, `$`, `#`
203///
204/// Spaces are converted to `+`, and all other characters are percent-encoded.
205///
206/// # Use Cases
207///
208/// Use this function when building query strings that will be submitted as form data
209/// or when you want the more compact `+` encoding for spaces.
210///
211/// # Examples
212///
213/// ```
214/// use uri_encode::encode_query_param;
215///
216/// // Spaces become +
217/// assert_eq!(encode_query_param("hello world"), "hello+world");
218///
219/// // Building a query string
220/// let name = "John Doe";
221/// let city = "New York";
222/// let query = format!("name={}&city={}", encode_query_param(name), encode_query_param(city));
223/// assert_eq!(query, "name=John+Doe&city=New+York");
224/// ```
225pub fn encode_query_param(s: impl AsRef<str>) -> String {
226    let mut encoded = String::with_capacity(s.as_ref().len());
227    for c in s.as_ref().as_bytes() {
228        match c {
229            b' ' => encoded.push('+'),
230            b'A'..=b'Z'
231            | b'a'..=b'z'
232            | b'0'..=b'9'
233            | b'-'
234            | b'_'
235            | b'.'
236            | b'!'
237            | b'~'
238            | b'*'
239            | b'\''
240            | b'('
241            | b')'
242            | b';'
243            | b','
244            | b'/'
245            | b'?'
246            | b':'
247            | b'@'
248            | b'&'
249            | b'='
250            | b'+'
251            | b'$'
252            | b'#' => encoded.push(char::from_u32(*c as _).unwrap()),
253            c => {
254                encoded.push('%');
255                encoded.push_str(&format!("{:02x}", c));
256            }
257        }
258    }
259    encoded
260}
261
262#[cfg(test)]
263mod tests {
264    use super::*;
265
266    mod encode_uri_tests {
267        use super::*;
268
269        #[test]
270        fn empty_string() {
271            assert_eq!(encode_uri(""), "");
272        }
273
274        #[test]
275        fn alphanumeric_unchanged() {
276            assert_eq!(encode_uri("ABCxyz123"), "ABCxyz123");
277        }
278
279        #[test]
280        fn unreserved_chars_unchanged() {
281            assert_eq!(encode_uri("-_.!~*'()"), "-_.!~*'()");
282        }
283
284        #[test]
285        fn reserved_chars_unchanged() {
286            assert_eq!(encode_uri(";,/?:@&=+$#"), ";,/?:@&=+$#");
287        }
288
289        #[test]
290        fn spaces_encoded() {
291            assert_eq!(encode_uri("hello world"), "hello%20world");
292        }
293
294        #[test]
295        fn complete_url_structure_preserved() {
296            assert_eq!(
297                encode_uri("https://user:pass@example.com:8080/path?q=1&r=2#frag"),
298                "https://user:pass@example.com:8080/path?q=1&r=2#frag"
299            );
300        }
301
302        #[test]
303        fn url_with_spaces() {
304            assert_eq!(
305                encode_uri("https://example.com/hello world?name=foo bar"),
306                "https://example.com/hello%20world?name=foo%20bar"
307            );
308        }
309
310        #[test]
311        fn non_ascii_encoded() {
312            assert_eq!(encode_uri("café"), "caf%c3%a9");
313            assert_eq!(encode_uri("日本"), "%e6%97%a5%e6%9c%ac");
314        }
315
316        #[test]
317        fn special_chars_encoded() {
318            assert_eq!(encode_uri("<>\""), "%3c%3e%22");
319            assert_eq!(encode_uri("{}|\\^`"), "%7b%7d%7c%5c%5e%60");
320        }
321
322        #[test]
323        fn accepts_string_slice() {
324            let s = String::from("test value");
325            assert_eq!(encode_uri(&s), "test%20value");
326            assert_eq!(encode_uri(s), "test%20value");
327        }
328    }
329
330    mod encode_uri_component_tests {
331        use super::*;
332
333        #[test]
334        fn empty_string() {
335            assert_eq!(encode_uri_component(""), "");
336        }
337
338        #[test]
339        fn alphanumeric_unchanged() {
340            assert_eq!(encode_uri_component("ABCxyz123"), "ABCxyz123");
341        }
342
343        #[test]
344        fn unreserved_chars_unchanged() {
345            assert_eq!(encode_uri_component("-_.!~*'()"), "-_.!~*'()");
346        }
347
348        #[test]
349        fn reserved_chars_encoded() {
350            assert_eq!(
351                encode_uri_component(";,/?:@&=+$#"),
352                "%3b%2c%2f%3f%3a%40%26%3d%2b%24%23"
353            );
354        }
355
356        #[test]
357        fn spaces_encoded() {
358            assert_eq!(encode_uri_component("hello world"), "hello%20world");
359        }
360
361        #[test]
362        fn path_segment() {
363            assert_eq!(encode_uri_component("path/to/file"), "path%2fto%2ffile");
364        }
365
366        #[test]
367        fn query_value_with_special_chars() {
368            assert_eq!(encode_uri_component("a=1&b=2"), "a%3d1%26b%3d2");
369        }
370
371        #[test]
372        fn html_entities_encoded() {
373            assert_eq!(encode_uri_component("<script>"), "%3cscript%3e");
374            assert_eq!(encode_uri_component("\"alert\""), "%22alert%22");
375        }
376
377        #[test]
378        fn non_ascii_encoded() {
379            assert_eq!(encode_uri_component("é"), "%c3%a9");
380            assert_eq!(encode_uri_component("émoji"), "%c3%a9moji");
381        }
382
383        #[test]
384        fn all_bytes_handled() {
385            // Test that control characters are encoded
386            assert_eq!(encode_uri_component("\x00"), "%00");
387            assert_eq!(encode_uri_component("\x1f"), "%1f");
388            assert_eq!(encode_uri_component("\x7f"), "%7f");
389        }
390    }
391
392    mod encode_query_param_tests {
393        use super::*;
394
395        #[test]
396        fn empty_string() {
397            assert_eq!(encode_query_param(""), "");
398        }
399
400        #[test]
401        fn alphanumeric_unchanged() {
402            assert_eq!(encode_query_param("ABCxyz123"), "ABCxyz123");
403        }
404
405        #[test]
406        fn spaces_become_plus() {
407            assert_eq!(encode_query_param("hello world"), "hello+world");
408            assert_eq!(encode_query_param("  "), "++");
409        }
410
411        #[test]
412        fn reserved_chars_unchanged() {
413            assert_eq!(encode_query_param(";,/?:@&=+$#"), ";,/?:@&=+$#");
414        }
415
416        #[test]
417        fn form_data_encoding() {
418            assert_eq!(encode_query_param("John Doe"), "John+Doe");
419            assert_eq!(encode_query_param("New York"), "New+York");
420        }
421
422        #[test]
423        fn non_ascii_encoded() {
424            assert_eq!(encode_query_param("naïve"), "na%c3%afve");
425        }
426
427        #[test]
428        fn mixed_content() {
429            assert_eq!(
430                encode_query_param("value with spaces & special <chars>"),
431                "value+with+spaces+&+special+%3cchars%3e"
432            );
433        }
434    }
435
436    mod comparison_tests {
437        use super::*;
438
439        #[test]
440        fn encode_uri_vs_component_slash() {
441            assert_eq!(encode_uri("/"), "/");
442            assert_eq!(encode_uri_component("/"), "%2f");
443        }
444
445        #[test]
446        fn encode_uri_vs_component_question() {
447            assert_eq!(encode_uri("?"), "?");
448            assert_eq!(encode_uri_component("?"), "%3f");
449        }
450
451        #[test]
452        fn encode_uri_vs_component_ampersand() {
453            assert_eq!(encode_uri("&"), "&");
454            assert_eq!(encode_uri_component("&"), "%26");
455        }
456
457        #[test]
458        fn encode_uri_vs_query_param_space() {
459            assert_eq!(encode_uri(" "), "%20");
460            assert_eq!(encode_uri_component(" "), "%20");
461            assert_eq!(encode_query_param(" "), "+");
462        }
463
464        #[test]
465        fn all_functions_same_on_alphanumeric() {
466            let s = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789";
467            assert_eq!(encode_uri(s), s);
468            assert_eq!(encode_uri_component(s), s);
469            assert_eq!(encode_query_param(s), s);
470        }
471
472        #[test]
473        fn all_functions_same_on_unreserved() {
474            let s = "-_.!~*'()";
475            assert_eq!(encode_uri(s), s);
476            assert_eq!(encode_uri_component(s), s);
477            assert_eq!(encode_query_param(s), s);
478        }
479    }
480
481    mod edge_cases {
482        use super::*;
483
484        #[test]
485        fn percent_sign_encoded() {
486            assert_eq!(encode_uri("%"), "%25");
487            assert_eq!(encode_uri_component("%"), "%25");
488            assert_eq!(encode_query_param("%"), "%25");
489        }
490
491        #[test]
492        fn already_encoded_gets_double_encoded() {
493            // This is correct behavior - we encode the input as-is
494            assert_eq!(encode_uri("%20"), "%2520");
495            assert_eq!(encode_uri_component("%20"), "%2520");
496        }
497
498        #[test]
499        fn unicode_multi_byte() {
500            // 3-byte UTF-8 character
501            assert_eq!(encode_uri("€"), "%e2%82%ac");
502            // 4-byte UTF-8 character (emoji)
503            assert_eq!(encode_uri("🦀"), "%f0%9f%a6%80");
504        }
505
506        #[test]
507        fn long_string() {
508            let input = "a".repeat(10000);
509            assert_eq!(encode_uri(&input), input);
510        }
511
512        #[test]
513        fn many_encoded_chars() {
514            let input = " ".repeat(1000);
515            let expected_uri = "%20".repeat(1000);
516            let expected_query = "+".repeat(1000);
517            assert_eq!(encode_uri(&input), expected_uri);
518            assert_eq!(encode_query_param(&input), expected_query);
519        }
520    }
521}