1fn is_unreserved(byte: u8) -> bool {
6 byte.is_ascii_alphanumeric() || matches!(byte, b'-' | b'.' | b'_' | b'~')
7}
8
9fn hex_val(b: u8) -> Option<u8> {
11 match b {
12 b'0'..=b'9' => Some(b - b'0'),
13 b'a'..=b'f' => Some(b - b'a' + 10),
14 b'A'..=b'F' => Some(b - b'A' + 10),
15 _ => None,
16 }
17}
18
19fn decode_unreserved_once(input: &str) -> (String, bool) {
23 let bytes = input.as_bytes();
24 let mut result = Vec::with_capacity(bytes.len());
25 let mut decoded_any = false;
26 let mut i = 0;
27
28 while i < bytes.len() {
29 if bytes[i] == b'%' && i + 2 < bytes.len() {
30 if let (Some(hi), Some(lo)) = (hex_val(bytes[i + 1]), hex_val(bytes[i + 2])) {
31 let decoded_byte = (hi << 4) | lo;
32 if is_unreserved(decoded_byte) {
33 result.push(decoded_byte);
34 decoded_any = true;
35 i += 3;
36 continue;
37 } else {
38 result.push(b'%');
40 result.push(bytes[i + 1].to_ascii_uppercase());
41 result.push(bytes[i + 2].to_ascii_uppercase());
42 i += 3;
43 continue;
44 }
45 }
46 result.push(bytes[i]);
48 i += 1;
49 } else {
50 result.push(bytes[i]);
51 i += 1;
52 }
53 }
54
55 (String::from_utf8_lossy(&result).into_owned(), decoded_any)
56}
57
58pub fn normalize_path(raw: &str) -> NormalizedComponent {
61 let mut current = raw.to_string();
62 let mut rounds = 0;
63
64 loop {
67 let (decoded, did_decode) = decode_unreserved_once(¤t);
68 current = decoded;
69 rounds += 1;
70 if !did_decode || rounds >= 3 {
71 break;
72 }
73 }
74
75 let double_encoded = detect_double_encoding(¤t);
78
79 NormalizedComponent {
80 raw: raw.to_string(),
81 normalized: current,
82 double_encoded,
83 rounds,
84 }
85}
86
87pub fn normalize_query(raw: &str) -> NormalizedComponent {
89 normalize_path(raw)
90}
91
92fn detect_double_encoding(s: &str) -> bool {
94 let bytes = s.as_bytes();
95 if bytes.len() < 5 {
96 return false;
97 }
98 let mut i = 0;
99 while i + 4 < bytes.len() {
100 if bytes[i] == b'%'
101 && bytes[i + 1] == b'2'
102 && bytes[i + 2] == b'5'
103 && hex_val(bytes[i + 3]).is_some()
104 && hex_val(bytes[i + 4]).is_some()
105 {
106 return true;
107 }
108 i += 1;
109 }
110 false
111}
112
113#[derive(Debug, Clone)]
115pub struct NormalizedComponent {
116 pub raw: String,
117 pub normalized: String,
118 pub double_encoded: bool,
119 pub rounds: u32,
120}
121
122#[cfg(test)]
123mod tests {
124 use super::*;
125
126 #[test]
127 fn test_unreserved_decoded() {
128 let result = normalize_path("%41");
130 assert_eq!(result.normalized, "A");
131 }
132
133 #[test]
134 fn test_reserved_preserved() {
135 let result = normalize_path("%2F");
137 assert_eq!(result.normalized, "%2F");
138 }
139
140 #[test]
141 fn test_reserved_at_preserved() {
142 let result = normalize_path("%40");
144 assert_eq!(result.normalized, "%40");
145 }
146
147 #[test]
148 fn test_reserved_colon_preserved() {
149 let result = normalize_path("%3A");
151 assert_eq!(result.normalized, "%3A");
152 }
153
154 #[test]
155 fn test_reserved_question_preserved() {
156 let result = normalize_path("%3F");
158 assert_eq!(result.normalized, "%3F");
159 }
160
161 #[test]
162 fn test_hex_case_normalized() {
163 let result = normalize_path("%2f");
165 assert_eq!(result.normalized, "%2F");
166 }
167
168 #[test]
169 fn test_double_encoding_detected() {
170 let result = normalize_path("%252F");
174 assert!(result.double_encoded);
175 }
176
177 #[test]
178 fn test_single_level_not_double_encoded() {
179 let result = normalize_path("%2F");
181 assert!(!result.double_encoded);
182 }
183
184 #[test]
185 fn test_mixed_encoding() {
186 let result = normalize_path("%41%2F");
188 assert_eq!(result.normalized, "A%2F");
189 }
190
191 #[test]
192 fn test_tilde_decoded() {
193 let result = normalize_path("%7E");
195 assert_eq!(result.normalized, "~");
196 }
197
198 #[test]
199 fn test_hyphen_decoded() {
200 let result = normalize_path("%2D");
202 assert_eq!(result.normalized, "-");
203 }
204
205 #[test]
206 fn test_dot_decoded() {
207 let result = normalize_path("%2E");
209 assert_eq!(result.normalized, ".");
210 }
211
212 #[test]
213 fn test_underscore_decoded() {
214 let result = normalize_path("%5F");
216 assert_eq!(result.normalized, "_");
217 }
218
219 #[test]
220 fn test_no_encoding() {
221 let result = normalize_path("/path/to/file");
222 assert_eq!(result.normalized, "/path/to/file");
223 assert_eq!(result.rounds, 1);
225 }
226
227 #[test]
228 fn test_invalid_percent_triplet() {
229 let result = normalize_path("%GG");
231 assert_eq!(result.normalized, "%GG");
232 }
233
234 #[test]
235 fn test_multiple_rounds() {
236 let result = normalize_path("%2541");
240 assert!(result.double_encoded);
241 }
242}