contextual_encoder/
uri.rs1use std::fmt;
16
17pub fn for_uri_component(input: &str) -> String {
38 let bytes = input.as_bytes();
39 let unreserved = bytes.iter().filter(|b| is_unreserved(**b)).count();
40 let capacity = unreserved + 3 * (bytes.len() - unreserved);
41 let mut out = String::with_capacity(capacity);
42 write_uri_component(&mut out, input).expect("writing to string cannot fail");
43 out
44}
45
46pub fn write_uri_component<W: fmt::Write>(out: &mut W, input: &str) -> fmt::Result {
50 let bytes = input.as_bytes();
51 let mut last_written = 0;
52
53 for (i, &byte) in bytes.iter().enumerate() {
54 if !is_unreserved(byte) {
55 if last_written < i {
57 out.write_str(&input[last_written..i])?;
59 }
60 write!(out, "%{:02X}", byte)?;
61 last_written = i + 1;
62 }
63 }
64
65 if last_written < bytes.len() {
67 out.write_str(&input[last_written..])?;
68 }
69 Ok(())
70}
71
72fn is_unreserved(b: u8) -> bool {
74 matches!(b, b'A'..=b'Z' | b'a'..=b'z' | b'0'..=b'9' | b'-' | b'.' | b'_' | b'~')
75}
76
77#[cfg(test)]
78mod tests {
79 use super::*;
80
81 #[test]
82 fn uri_component_no_encoding_needed() {
83 assert_eq!(for_uri_component("hello"), "hello");
84 assert_eq!(for_uri_component(""), "");
85 assert_eq!(for_uri_component("ABCxyz019"), "ABCxyz019");
86 assert_eq!(for_uri_component("-._~"), "-._~");
87 }
88
89 #[test]
90 fn uri_component_encodes_space() {
91 assert_eq!(for_uri_component("a b"), "a%20b");
92 }
93
94 #[test]
95 fn uri_component_encodes_reserved_chars() {
96 assert_eq!(for_uri_component("a=b"), "a%3Db");
97 assert_eq!(for_uri_component("a&b"), "a%26b");
98 assert_eq!(for_uri_component("a+b"), "a%2Bb");
99 assert_eq!(for_uri_component("a?b"), "a%3Fb");
100 assert_eq!(for_uri_component("a#b"), "a%23b");
101 assert_eq!(for_uri_component("a/b"), "a%2Fb");
102 }
103
104 #[test]
105 fn uri_component_encodes_html_significant() {
106 assert_eq!(for_uri_component("<script>"), "%3Cscript%3E");
107 assert_eq!(for_uri_component(r#""quoted""#), "%22quoted%22");
108 }
109
110 #[test]
111 fn uri_component_encodes_two_byte_utf8() {
112 assert_eq!(for_uri_component("\u{00A0}"), "%C2%A0");
114 assert_eq!(for_uri_component("é"), "%C3%A9");
116 }
117
118 #[test]
119 fn uri_component_encodes_three_byte_utf8() {
120 assert_eq!(for_uri_component("\u{0800}"), "%E0%A0%80");
122 assert_eq!(for_uri_component("世"), "%E4%B8%96");
124 }
125
126 #[test]
127 fn uri_component_encodes_four_byte_utf8() {
128 assert_eq!(for_uri_component("\u{10000}"), "%F0%90%80%80");
130 assert_eq!(for_uri_component("😀"), "%F0%9F%98%80");
132 }
133
134 #[test]
135 fn uri_component_encodes_control_chars() {
136 assert_eq!(for_uri_component("\x00"), "%00");
137 assert_eq!(for_uri_component("\x1F"), "%1F");
138 assert_eq!(for_uri_component("\x7F"), "%7F");
139 }
140
141 #[test]
142 fn uri_component_mixed() {
143 assert_eq!(
144 for_uri_component("key=hello world&foo=bar"),
145 "key%3Dhello%20world%26foo%3Dbar"
146 );
147 }
148
149 #[test]
150 fn uri_component_writer_variant() {
151 let mut out = String::new();
152 write_uri_component(&mut out, "a b").unwrap();
153 assert_eq!(out, "a%20b");
154 }
155}