1fn is_unreserved(byte: u8) -> bool {
8 byte.is_ascii_alphanumeric() || matches!(byte, b'-' | b'.' | b'_' | b'~')
9}
10
11fn hex_val(b: u8) -> Option<u8> {
13 match b {
14 b'0'..=b'9' => Some(b - b'0'),
15 b'a'..=b'f' => Some(b - b'a' + 10),
16 b'A'..=b'F' => Some(b - b'A' + 10),
17 _ => None,
18 }
19}
20
21fn decode_unreserved_once(input: &str) -> (String, bool) {
25 let bytes = input.as_bytes();
26 let mut result = Vec::with_capacity(bytes.len());
27 let mut decoded_any = false;
28 let mut i = 0;
29
30 while i < bytes.len() {
31 if bytes[i] == b'%' && i + 2 < bytes.len() {
32 if let (Some(hi), Some(lo)) = (hex_val(bytes[i + 1]), hex_val(bytes[i + 2])) {
33 let decoded_byte = (hi << 4) | lo;
34 if is_unreserved(decoded_byte) {
35 result.push(decoded_byte);
36 decoded_any = true;
37 i += 3;
38 continue;
39 } else {
40 result.push(b'%');
42 result.push(bytes[i + 1].to_ascii_uppercase());
43 result.push(bytes[i + 2].to_ascii_uppercase());
44 i += 3;
45 continue;
46 }
47 }
48 result.push(bytes[i]);
50 i += 1;
51 } else {
52 result.push(bytes[i]);
53 i += 1;
54 }
55 }
56
57 (String::from_utf8_lossy(&result).into_owned(), decoded_any)
58}
59
60pub fn normalize_path(raw: &str) -> NormalizedComponent {
63 let mut current = raw.to_string();
64 let mut rounds = 0;
65
66 loop {
70 let (decoded, did_decode) = decode_unreserved_once(¤t);
71 current = decoded;
72 rounds += 1;
73 if !did_decode || rounds >= 3 {
74 break;
75 }
76 }
77
78 let double_encoded = detect_double_encoding(¤t);
79
80 NormalizedComponent {
81 raw: raw.to_string(),
82 normalized: current,
83 double_encoded,
84 rounds,
85 }
86}
87
88pub fn normalize_query(raw: &str) -> NormalizedComponent {
90 normalize_path(raw)
91}
92
93fn detect_double_encoding(s: &str) -> bool {
95 let bytes = s.as_bytes();
96 if bytes.len() < 5 {
97 return false;
98 }
99 let mut i = 0;
100 while i + 4 < bytes.len() {
101 if bytes[i] == b'%'
102 && bytes[i + 1] == b'2'
103 && bytes[i + 2] == b'5'
104 && hex_val(bytes[i + 3]).is_some()
105 && hex_val(bytes[i + 4]).is_some()
106 {
107 return true;
108 }
109 i += 1;
110 }
111 false
112}
113
114#[derive(Debug, Clone)]
116pub struct NormalizedComponent {
117 pub raw: String,
118 pub normalized: String,
119 pub double_encoded: bool,
120 pub rounds: u32,
121}
122
123#[cfg(test)]
124mod tests {
125 use super::*;
126
127 #[test]
128 fn test_unreserved_decoded() {
129 let result = normalize_path("%41");
131 assert_eq!(result.normalized, "A");
132 }
133
134 #[test]
135 fn test_reserved_preserved() {
136 let result = normalize_path("%2F");
138 assert_eq!(result.normalized, "%2F");
139 }
140
141 #[test]
142 fn test_reserved_at_preserved() {
143 let result = normalize_path("%40");
145 assert_eq!(result.normalized, "%40");
146 }
147
148 #[test]
149 fn test_reserved_colon_preserved() {
150 let result = normalize_path("%3A");
152 assert_eq!(result.normalized, "%3A");
153 }
154
155 #[test]
156 fn test_reserved_question_preserved() {
157 let result = normalize_path("%3F");
159 assert_eq!(result.normalized, "%3F");
160 }
161
162 #[test]
163 fn test_hex_case_normalized() {
164 let result = normalize_path("%2f");
166 assert_eq!(result.normalized, "%2F");
167 }
168
169 #[test]
170 fn test_double_encoding_detected() {
171 let result = normalize_path("%252F");
174 assert!(result.double_encoded);
175 }
176
177 #[test]
178 fn test_single_level_not_double_encoded() {
179 let result = normalize_path("%2F");
180 assert!(!result.double_encoded);
181 }
182
183 #[test]
184 fn test_mixed_encoding() {
185 let result = normalize_path("%41%2F");
187 assert_eq!(result.normalized, "A%2F");
188 }
189
190 #[test]
191 fn test_tilde_decoded() {
192 let result = normalize_path("%7E");
194 assert_eq!(result.normalized, "~");
195 }
196
197 #[test]
198 fn test_hyphen_decoded() {
199 let result = normalize_path("%2D");
201 assert_eq!(result.normalized, "-");
202 }
203
204 #[test]
205 fn test_dot_decoded() {
206 let result = normalize_path("%2E");
208 assert_eq!(result.normalized, ".");
209 }
210
211 #[test]
212 fn test_underscore_decoded() {
213 let result = normalize_path("%5F");
215 assert_eq!(result.normalized, "_");
216 }
217
218 #[test]
219 fn test_no_encoding() {
220 let result = normalize_path("/path/to/file");
221 assert_eq!(result.normalized, "/path/to/file");
222 assert_eq!(result.rounds, 1);
224 }
225
226 #[test]
227 fn test_invalid_percent_triplet() {
228 let result = normalize_path("%GG");
230 assert_eq!(result.normalized, "%GG");
231 }
232
233 #[test]
234 fn test_multiple_rounds() {
235 let result = normalize_path("%2541");
236 assert!(result.double_encoded);
237 }
238}