libdd_trace_obfuscation/
http.rs1use fluent_uri::UriRef;
9use percent_encoding::percent_decode_str;
10use std::fmt::Write;
11
12fn is_go_url_escape_cat1(c: char) -> bool {
15 matches!(
16 c,
17 '\\' | '^' | '{' | '}' | '|' | '<' | '>' | '`' | ' ' | '"'
18 )
19}
20
21fn is_go_url_escape_cat2_path(c: char) -> bool {
24 matches!(c, '!' | '\'' | '(' | ')' | '*' | '[' | ']')
25}
26
27fn is_go_url_escape_cat2_fragment(c: char) -> bool {
30 matches!(c, '\'' | '[' | ']')
31}
32
33fn hex_val(b: u8) -> u8 {
34 match b {
35 b'0'..=b'9' => b - b'0',
36 b'a'..=b'f' => b - b'a' + 10,
37 _ => b - b'A' + 10,
38 }
39}
40
41fn normalize_pct_encoded_unreserved(path: &str) -> String {
43 let b = path.as_bytes();
44 let mut out = String::with_capacity(path.len());
45 let mut i = 0;
46 while i < b.len() {
47 if b[i] == b'%'
48 && i + 2 < b.len()
49 && b[i + 1].is_ascii_hexdigit()
50 && b[i + 2].is_ascii_hexdigit()
51 {
52 let v = (hex_val(b[i + 1]) << 4) | hex_val(b[i + 2]);
53 if v.is_ascii_alphanumeric() || matches!(v, b'.' | b'_' | b'~') {
54 out.push(v as char);
55 } else {
56 out.push_str(&path[i..i + 3]);
57 }
58 i += 3;
59 } else {
60 out.push(b[i] as char);
61 i += 1;
62 }
63 }
64 out
65}
66
67fn encode_char(out: &mut String, c: char) {
68 let mut buf = [0u8; 4];
69 for &b in c.encode_utf8(&mut buf).as_bytes() {
70 let _ = write!(out, "%{b:02X}");
71 }
72}
73
74fn redact_path_digits(path: &str) -> String {
75 path.split('/')
76 .map(|seg| {
77 if percent_decode_str(seg)
78 .decode_utf8_lossy()
79 .chars()
80 .any(|c| c.is_ascii_digit())
81 {
82 "?"
83 } else {
84 seg
85 }
86 })
87 .collect::<Vec<_>>()
88 .join("/")
89}
90
91pub fn obfuscate_url_string(
92 url: &str,
93 remove_query_string: bool,
94 remove_path_digits: bool,
95) -> String {
96 if url.is_empty() {
97 return String::new();
98 }
99
100 let frag_pos = url.find('#');
101 let path_query_end = frag_pos.unwrap_or(url.len());
102 let path_end = url[..path_query_end].find('?').unwrap_or(path_query_end);
103
104 if url[..path_query_end].bytes().any(|b| b < 0x20 || b == 0x7F) {
106 return if remove_query_string || remove_path_digits {
107 "?".to_string()
108 } else {
109 url.to_string()
110 };
111 }
112
113 let path = &url[..path_end];
115 let needs_full_path = path.bytes().any(|b| b > 127) || path.chars().any(is_go_url_escape_cat1);
116 let frag_has_non_ascii = frag_pos.is_some_and(|i| url[i + 1..].bytes().any(|b| b > 127));
117
118 let mut pre = String::with_capacity(url.len() * 4);
124 for c in url[..path_end].chars() {
125 if !c.is_ascii() {
126 encode_char(&mut pre, c);
127 } else if is_go_url_escape_cat1(c) || (needs_full_path && is_go_url_escape_cat2_path(c)) {
128 let _ = write!(pre, "%{:02X}", c as u8);
129 } else {
130 pre.push(c);
131 }
132 }
133 if let Some(fi) = frag_pos {
134 pre.push('#');
135 for c in url[fi + 1..].chars() {
136 if !c.is_ascii()
137 || (c as u32) < 0x20
138 || c as u32 == 0x7F
139 || c == '#'
140 || is_go_url_escape_cat1(c)
141 || (frag_has_non_ascii && is_go_url_escape_cat2_fragment(c))
142 {
143 encode_char(&mut pre, c);
144 } else {
145 pre.push(c);
146 }
147 }
148 }
149
150 let uri = match UriRef::parse(pre.as_str()) {
151 Ok(u) => u,
152 Err(_) => {
153 return if remove_query_string || remove_path_digits {
154 "?".to_string()
155 } else {
156 url.to_string()
157 };
158 }
159 };
160
161 let mut out = String::new();
162
163 if let Some(scheme) = uri.scheme() {
164 out.push_str(&scheme.as_str().to_lowercase());
165 out.push(':');
166 }
167
168 if let Some(auth) = uri.authority() {
169 out.push_str("//");
170 out.push_str(auth.host());
172 if let Some(port) = auth.port() {
173 out.push(':');
174 out.push_str(port.as_str());
175 }
176 let path_str = normalize_pct_encoded_unreserved(uri.path().as_str());
177 if remove_path_digits {
178 out.push_str(&redact_path_digits(&path_str));
179 } else {
180 out.push_str(&path_str);
181 }
182 } else if let Some(scheme) = uri.scheme() {
183 let scheme_end = scheme.as_str().len() + 1;
187 out.push_str(&url[scheme_end..path_end]);
189 } else {
190 let path_str = normalize_pct_encoded_unreserved(uri.path().as_str());
192 if remove_path_digits {
193 out.push_str(&redact_path_digits(&path_str));
194 } else {
195 out.push_str(&path_str);
196 }
197 }
198
199 if remove_query_string {
202 if path_end < path_query_end {
203 out.push('?');
204 }
205 } else if path_end < path_query_end {
206 out.push_str(&url[path_end..path_query_end]);
208 }
209
210 if let Some(frag) = uri.fragment() {
211 if !frag.as_str().is_empty() {
212 out.push('#');
213 out.push_str(frag.as_str());
214 }
215 }
216
217 out
218}
219
220#[cfg(test)]
221mod tests {
222 use duplicate::duplicate_item;
223
224 use super::obfuscate_url_string;
225
226 #[duplicate_item(
227 [
228 test_name [remove_query_string_1]
229 remove_query_string [true]
230 remove_path_digits [false]
231 input ["http://foo.com/"]
232 expected_output ["http://foo.com/"];
233 ]
234 [
235 test_name [remove_query_string_2]
236 remove_query_string [true]
237 remove_path_digits [false]
238 input ["http://foo.com/123"]
239 expected_output ["http://foo.com/123"];
240 ]
241 [
242 test_name [remove_query_string_3]
243 remove_query_string [true]
244 remove_path_digits [false]
245 input ["http://foo.com/id/123/page/1?search=bar&page=2"]
246 expected_output ["http://foo.com/id/123/page/1?"];
247 ]
248 [
249 test_name [remove_query_string_4]
250 remove_query_string [true]
251 remove_path_digits [false]
252 input ["http://foo.com/id/123/page/1?search=bar&page=2#fragment"]
253 expected_output ["http://foo.com/id/123/page/1?#fragment"];
254 ]
255 [
256 test_name [remove_query_string_5]
257 remove_query_string [true]
258 remove_path_digits [false]
259 input ["http://foo.com/id/123/page/1?blabla"]
260 expected_output ["http://foo.com/id/123/page/1?"];
261 ]
262 [
263 test_name [remove_query_string_6]
264 remove_query_string [true]
265 remove_path_digits [false]
266 input ["http://foo.com/id/123/pa%3Fge/1?blabla"]
267 expected_output ["http://foo.com/id/123/pa%3Fge/1?"];
268 ]
269 [
270 test_name [remove_query_string_7]
271 remove_query_string [true]
272 remove_path_digits [false]
273 input ["http://user:password@foo.com/1/2/3?q=james"]
274 expected_output ["http://foo.com/1/2/3?"];
275 ]
276 [
277 test_name [remove_path_digits_1]
278 remove_query_string [false]
279 remove_path_digits [true]
280 input ["http://foo.com/"]
281 expected_output ["http://foo.com/"];
282 ]
283 [
284 test_name [remove_path_digits_2]
285 remove_query_string [false]
286 remove_path_digits [true]
287 input ["http://foo.com/name?query=search"]
288 expected_output ["http://foo.com/name?query=search"];
289 ]
290 [
291 test_name [remove_path_digits_3]
292 remove_query_string [false]
293 remove_path_digits [true]
294 input ["http://foo.com/id/123/page/1?search=bar&page=2"]
295 expected_output ["http://foo.com/id/?/page/??search=bar&page=2"];
296 ]
297 [
298 test_name [remove_path_digits_4]
299 remove_query_string [false]
300 remove_path_digits [true]
301 input ["http://foo.com/id/a1/page/1qwe233?search=bar&page=2#fragment-123"]
302 expected_output ["http://foo.com/id/?/page/??search=bar&page=2#fragment-123"];
303 ]
304 [
305 test_name [remove_path_digits_5]
306 remove_query_string [false]
307 remove_path_digits [true]
308 input ["http://foo.com/123"]
309 expected_output ["http://foo.com/?"];
310 ]
311 [
312 test_name [remove_path_digits_6]
313 remove_query_string [false]
314 remove_path_digits [true]
315 input ["http://foo.com/123/abcd9"]
316 expected_output ["http://foo.com/?/?"];
317 ]
318 [
319 test_name [remove_path_digits_7]
320 remove_query_string [false]
321 remove_path_digits [true]
322 input ["http://foo.com/123/name/abcd9"]
323 expected_output ["http://foo.com/?/name/?"];
324 ]
325 [
326 test_name [remove_path_digits_8]
327 remove_query_string [false]
328 remove_path_digits [true]
329 input ["http://foo.com/1%3F3/nam%3Fe/abcd9"]
330 expected_output ["http://foo.com/?/nam%3Fe/?"];
331 ]
332 [
333 test_name [empty_input]
334 remove_query_string [false]
335 remove_path_digits [false]
336 input [""]
337 expected_output [""];
338 ]
339 [
340 test_name [non_printable_chars]
341 remove_query_string [false]
342 remove_path_digits [false]
343 input ["\u{10}"]
344 expected_output ["\u{10}"];
346 ]
347 [
348 test_name [non_printable_chars_and_unicode]
349 remove_query_string [true]
350 remove_path_digits [true]
351 input ["\u{10}ჸ"]
352 expected_output ["?"];
353 ]
354 [
355 test_name [hashtag]
356 remove_query_string [true]
357 remove_path_digits [true]
358 input ["#"]
359 expected_output [""];
360 ]
361 [
362 test_name [fuzzing_1050521893]
363 remove_query_string [true]
364 remove_path_digits [true]
365 input ["ჸ"]
366 expected_output ["%E1%83%B8"];
367 ]
368 [
369 test_name [fuzzing_594901251]
370 remove_query_string [true]
371 remove_path_digits [true]
372 input ["%"]
373 expected_output ["?"];
374 ]
375 [
376 test_name [fuzzing_3638045804]
377 remove_query_string [true]
378 remove_path_digits [true]
379 input ["."]
380 expected_output ["."];
381 ]
382 [
383 test_name [fuzzing_1928485962]
384 remove_query_string [true]
385 remove_path_digits [true]
386 input ["0"]
387 expected_output ["?"];
388 ]
389 [
390 test_name [fuzzing_4273565798]
391 remove_query_string [true]
392 remove_path_digits [true]
393 input ["!ჸ"]
394 expected_output ["%21%E1%83%B8"];
395 ]
396 [
397 test_name [fuzzing_1457007156]
398 remove_query_string [true]
399 remove_path_digits [true]
400 input ["!"]
401 expected_output ["!"];
402 ]
403 [
404 test_name [fuzzing_3119724369]
405 remove_query_string [true]
406 remove_path_digits [true]
407 input [":"]
408 expected_output ["?"];
409 ]
410 [
411 test_name [fuzzing_1092426409]
412 remove_query_string [true]
413 remove_path_digits [true]
414 input ["#ჸ"]
415 expected_output ["#%E1%83%B8"];
416 ]
417 [
418 test_name [fuzzing_1323831861]
419 remove_query_string [true]
420 remove_path_digits [true]
421 input ["#\u{01}"]
422 expected_output ["#%01"];
423 ]
424 [
425 test_name [fuzzing_35626170]
426 remove_query_string [true]
427 remove_path_digits [true]
428 input ["#\u{01}ჸ"]
429 expected_output ["#%01%E1%83%B8"];
430 ]
431 [
432 test_name [fuzzing_618280270]
433 remove_query_string [true]
434 remove_path_digits [true]
435 input ["\\"]
436 expected_output ["%5C"];
437 ]
438 [
439 test_name [fuzzing_1505427946]
440 remove_query_string [true]
441 remove_path_digits [true]
442 input ["[ჸ"]
443 expected_output ["%5B%E1%83%B8"];
444 ]
445 [
446 test_name [fuzzing_backslash_unicode]
447 remove_query_string [true]
448 remove_path_digits [true]
449 input ["\\ჸ"]
450 expected_output ["%5C%E1%83%B8"];
451 ]
452 [
453 test_name [fuzzing_2438023093]
454 remove_query_string [true]
455 remove_path_digits [true]
456 input ["ჸ#"]
457 expected_output ["%E1%83%B8"];
458 ]
459 [
460 test_name [fuzzing_2729083127]
461 remove_query_string [true]
462 remove_path_digits [true]
463 input ["!#ჸ"]
464 expected_output ["!#%E1%83%B8"];
465 ]
466 [
467 test_name [fuzzing_slash_unicode]
468 remove_query_string [true]
469 remove_path_digits [true]
470 input ["/ჸ"]
471 expected_output ["/%E1%83%B8"];
472 ]
473 [
474 test_name [fuzzing_3710129001]
475 remove_query_string [true]
476 remove_path_digits [true]
477 input ["##"]
478 expected_output ["#%23"];
479 ]
480 [
481 test_name [fuzzing_1009954227]
482 remove_query_string [true]
483 remove_path_digits [true]
484 input ["ჸ#\u{10}"]
485 expected_output ["%E1%83%B8#%10"];
486 ]
487 [
488 test_name [fuzzing_hash_exclamation]
489 remove_query_string [true]
490 remove_path_digits [true]
491 input ["ჸ#!"]
492 expected_output ["%E1%83%B8#!"];
493 ]
494 [
495 test_name [fuzzing_578834728]
496 remove_query_string [true]
497 remove_path_digits [true]
498 input ["#%"]
499 expected_output ["?"];
500 ]
501 [
502 test_name [fuzzing_3991369296]
503 remove_query_string [true]
504 remove_path_digits [true]
505 input ["#'ჸ"]
506 expected_output ["#%27%E1%83%B8"];
507 ]
508 [
509 test_name [fuzzing_path_frag_quote]
510 remove_query_string [true]
511 remove_path_digits [true]
512 input ["ჸ#'ჸ"]
513 expected_output ["%E1%83%B8#%27%E1%83%B8"];
514 ]
515 [
516 test_name [fuzzing_hash_excl_unicode]
517 remove_query_string [true]
518 remove_path_digits [true]
519 input ["#!ჸ"]
520 expected_output ["#!%E1%83%B8"];
521 ]
522 [
523 test_name [fuzzing_2455396347_cat1_triggers_cat2]
525 remove_query_string [true]
526 remove_path_digits [true]
527 input ["<!"]
528 expected_output ["%3C%21"];
529 ]
530 [
531 test_name [fuzzing_3886417401]
533 remove_query_string [true]
534 remove_path_digits [true]
535 input ["ჸ#%\u{1}"]
536 expected_output ["?"];
537 ]
538 [
539 test_name [parity_double_quote_cat1]
540 remove_query_string [true]
541 remove_path_digits [true]
542 input ["\"!"]
543 expected_output ["%22%21"];
544 ]
545 [
546 test_name [parity_dot_hash_unicode]
547 remove_query_string [true]
548 remove_path_digits [true]
549 input [".#ჸ"]
550 expected_output [".#%E1%83%B8"];
551 ]
552 [
553 test_name [parity_dot_hash]
554 remove_query_string [true]
555 remove_path_digits [true]
556 input [".#"]
557 expected_output ["."];
558 ]
559 [
560 test_name [parity_unicode_hash_digit]
561 remove_query_string [true]
562 remove_path_digits [true]
563 input ["ჸ#0"]
564 expected_output ["%E1%83%B8#0"];
565 ]
566 [
567 test_name [parity_scheme_empty_frag]
568 remove_query_string [true]
569 remove_path_digits [true]
570 input ["C:#"]
571 expected_output ["c:"];
572 ]
573 [
574 test_name [parity_relative_dotdot_unicode]
575 remove_query_string [true]
576 remove_path_digits [true]
577 input ["../ჸ"]
578 expected_output ["../%E1%83%B8"];
579 ]
580 [
581 test_name [parity_query_hash_unicode_both]
582 remove_query_string [true]
583 remove_path_digits [true]
584 input ["?#ჸ"]
585 expected_output ["?#%E1%83%B8"];
586 ]
587 [
588 test_name [parity_query_hash_unicode_digits]
589 remove_query_string [false]
590 remove_path_digits [true]
591 input ["?#ჸ"]
592 expected_output ["?#%E1%83%B8"];
593 ]
594 [
595 test_name [parity_excl_query_unicode]
596 remove_query_string [true]
597 remove_path_digits [true]
598 input ["!?ჸ"]
599 expected_output ["!?"];
600 ]
601 [
602 test_name [parity_query_unicode_keep]
603 remove_query_string [false]
604 remove_path_digits [true]
605 input ["?ჸ"]
606 expected_output ["?ჸ"];
607 ]
608 [
609 test_name [parity_space_unicode]
610 remove_query_string [true]
611 remove_path_digits [true]
612 input [" ჸ"]
613 expected_output ["%20%E1%83%B8"];
614 ]
615 [
616 test_name [parity_unicode_query_unicode_keep]
617 remove_query_string [false]
618 remove_path_digits [true]
619 input ["ჸ?ჸ"]
620 expected_output ["%E1%83%B8?ჸ"];
621 ]
622 [
623 test_name [parity_unicode_query_hash_both]
624 remove_query_string [true]
625 remove_path_digits [true]
626 input ["?ჸ#ჸ"]
627 expected_output ["?#%E1%83%B8"];
628 ]
629 [
630 test_name [parity_unicode_query_empty_hash]
631 remove_query_string [false]
632 remove_path_digits [true]
633 input ["ჸ?#"]
634 expected_output ["%E1%83%B8?"];
635 ]
636 [
637 test_name [parity_pct_unreserved_normalize]
638 remove_query_string [true]
639 remove_path_digits [false]
640 input ["%30ჸ"]
641 expected_output ["0%E1%83%B8"];
642 ]
643 [
644 test_name [parity_unicode_query_invalid_pct]
645 remove_query_string [true]
646 remove_path_digits [true]
647 input ["ჸ?%"]
648 expected_output ["%E1%83%B8?"];
649 ]
650 [
651 test_name [parity_not_a_url_both_false]
652 remove_query_string [false]
653 remove_path_digits [false]
654 input ["this is not a valid url"]
655 expected_output ["this%20is%20not%20a%20valid%20url"];
656 ]
657 [
658 test_name [parity_not_a_url_both_true]
659 remove_query_string [true]
660 remove_path_digits [true]
661 input ["this is not a valid url"]
662 expected_output ["this%20is%20not%20a%20valid%20url"];
663 ]
664 [
665 test_name [parity_disabled_userinfo]
666 remove_query_string [false]
667 remove_path_digits [false]
668 input ["http://user:password@foo.com/1/2/3?q=james"]
669 expected_output ["http://foo.com/1/2/3?q=james"];
670 ]
671 [
672 test_name [parity_colon_both_false]
673 remove_query_string [false]
674 remove_path_digits [false]
675 input [":"]
676 expected_output [":"];
677 ]
678 [
679 test_name [parity_pct_both_false]
680 remove_query_string [false]
681 remove_path_digits [false]
682 input ["%"]
683 expected_output ["%"];
684 ]
685 [
686 test_name [parity_ctrl_in_scheme_both_false]
687 remove_query_string [false]
688 remove_path_digits [false]
689 input ["C:\u{1}"]
690 expected_output ["C:\u{1}"];
691 ]
692 [
693 test_name [parity_ctrl_both_false]
694 remove_query_string [false]
695 remove_path_digits [false]
696 input ["\u{1}"]
697 expected_output ["\u{1}"];
698 ]
699 [
700 test_name [parity_frag_curly_brace]
701 remove_query_string [true]
702 remove_path_digits [true]
703 input ["ჸ#{ჸ"]
704 expected_output ["%E1%83%B8#%7B%E1%83%B8"];
705 ]
706 [
707 test_name [parity_opaque_url_unicode]
709 remove_query_string [true]
710 remove_path_digits [true]
711 input ["A:ჸ"]
712 expected_output ["a:ჸ"];
713 ]
714 [
715 test_name [no_decode_dash]
716 remove_query_string [false]
717 remove_path_digits [false]
718 input ["http://foo.com/foo%20bar/"]
719 expected_output ["http://foo.com/foo%20bar/"];
720 ]
721 [
722 test_name [parity_fuzzing_supp_unicode_frag]
725 remove_query_string [true]
726 remove_path_digits [true]
727 input ["\u{91cb8}\u{9232f}झ\u{44db0}#\u{3}\n\u{5bb50}\u{925d9}\u{925d5}\u{925d5}\u{925d5}\u{925d5}䕞\u{9a70d}\u{3d2ff}\u{10ef4f}\u{87307}\u{6}\u{10ef0a}\u{10ffff}\u{ad7e5}\u{33f}筚\u{361}➑\u{2}{\u{10de13}\u{10ffff}\u{10ffff}'"]
728 expected_output ["%F2%91%B2%B8%F2%92%8C%AF%E0%A4%9D%F1%84%B6%B0#%03%0A%F1%9B%AD%90%F2%92%97%99%F2%92%97%95%F2%92%97%95%F2%92%97%95%F2%92%97%95%E4%95%9E%F2%9A%9C%8D%F0%BD%8B%BF%F4%8E%BD%8F%F2%87%8C%87%06%F4%8E%BC%8A%F4%8F%BF%BF%F2%AD%9F%A5%CC%BF%E7%AD%9A%CD%A1%E2%9E%91%02%7B%F4%8D%B8%93%F4%8F%BF%BF%F4%8F%BF%BF%27"];
729 ]
730 )]
731 #[test]
732 fn test_name() {
733 let result = obfuscate_url_string(input, remove_query_string, remove_path_digits);
734 assert_eq!(result, expected_output);
735 }
736}