libdd_trace_obfuscation/
http.rs1use fluent_uri::UriRef;
9use percent_encoding::percent_decode_str;
10use std::fmt::Write;
11
12const fn is_go_url_escape_cat1(c: char) -> bool {
15 matches!(
16 c,
17 '\\' | '^' | '{' | '}' | '|' | '<' | '>' | '`' | ' ' | '"'
18 )
19}
20
21const fn is_go_url_escape_cat2_path(c: char) -> bool {
24 matches!(c, '!' | '\'' | '(' | ')' | '*' | '[' | ']')
25}
26
27const fn is_go_url_escape_cat2_fragment(c: char) -> bool {
30 matches!(c, '\'' | '[' | ']')
31}
32
33const fn hex_val(b: u8) -> u8 {
34 match b {
35 b'0'..=b'9' => b - b'0',
36 b'a'..=b'f' => b - b'a' + 10,
37 _ => b - b'A' + 10,
38 }
39}
40
41fn normalize_pct_encoded_unreserved(path: &str) -> String {
43 let b = path.as_bytes();
44 let mut out = String::with_capacity(path.len());
45 let mut i = 0;
46 while i < b.len() {
47 if b[i] == b'%'
48 && i + 2 < b.len()
49 && b[i + 1].is_ascii_hexdigit()
50 && b[i + 2].is_ascii_hexdigit()
51 {
52 let v = (hex_val(b[i + 1]) << 4) | hex_val(b[i + 2]);
53 if v.is_ascii_alphanumeric() || matches!(v, b'.' | b'_' | b'~') {
54 out.push(v as char);
55 } else {
56 out.push_str(&path[i..i + 3]);
57 }
58 i += 3;
59 } else {
60 out.push(b[i] as char);
61 i += 1;
62 }
63 }
64 out
65}
66
67fn encode_char(out: &mut String, c: char) {
68 let mut buf = [0u8; 4];
69 for &b in c.encode_utf8(&mut buf).as_bytes() {
70 let _ = write!(out, "%{b:02X}");
71 }
72}
73
74fn redact_path_digits(path: &str) -> String {
75 path.split('/')
76 .map(|seg| {
77 if percent_decode_str(seg)
78 .decode_utf8_lossy()
79 .chars()
80 .any(|c| c.is_ascii_digit())
81 {
82 "?"
83 } else {
84 seg
85 }
86 })
87 .collect::<Vec<_>>()
88 .join("/")
89}
90
91pub fn obfuscate_url_string(
92 url: &str,
93 remove_query_string: bool,
94 remove_path_digits: bool,
95) -> String {
96 if url.is_empty() {
97 return String::new();
98 }
99
100 let frag_pos = url.find('#');
101 let path_query_end = frag_pos.unwrap_or(url.len());
102 let path_end = url[..path_query_end].find('?').unwrap_or(path_query_end);
103
104 if url[..path_query_end].bytes().any(|b| b < 0x20 || b == 0x7F) {
106 return if remove_query_string || remove_path_digits {
107 "?".to_string()
108 } else {
109 url.to_string()
110 };
111 }
112
113 let path = &url[..path_end];
115 let needs_full_path = path.bytes().any(|b| b > 127) || path.chars().any(is_go_url_escape_cat1);
116 let frag_has_non_ascii = frag_pos.is_some_and(|i| url[i + 1..].bytes().any(|b| b > 127));
117
118 let mut pre = String::with_capacity(url.len() * 4);
124 for c in url[..path_end].chars() {
125 if !c.is_ascii() {
126 encode_char(&mut pre, c);
127 } else if is_go_url_escape_cat1(c) || (needs_full_path && is_go_url_escape_cat2_path(c)) {
128 let _ = write!(pre, "%{:02X}", c as u8);
129 } else {
130 pre.push(c);
131 }
132 }
133 if let Some(fi) = frag_pos {
134 pre.push('#');
135 for c in url[fi + 1..].chars() {
136 if !c.is_ascii()
137 || (c as u32) < 0x20
138 || c as u32 == 0x7F
139 || c == '#'
140 || is_go_url_escape_cat1(c)
141 || (frag_has_non_ascii && is_go_url_escape_cat2_fragment(c))
142 {
143 encode_char(&mut pre, c);
144 } else {
145 pre.push(c);
146 }
147 }
148 }
149
150 let Ok(parsed) = UriRef::parse(pre.as_str()) else {
151 return if remove_query_string || remove_path_digits {
152 "?".to_string()
153 } else {
154 url.to_string()
155 };
156 };
157
158 let mut out = String::new();
159
160 if let Some(scheme) = parsed.scheme() {
161 out.push_str(&scheme.as_str().to_lowercase());
162 out.push(':');
163 }
164
165 if let Some(auth) = parsed.authority() {
166 out.push_str("//");
167 out.push_str(auth.host());
169 if let Some(port) = auth.port() {
170 out.push(':');
171 out.push_str(port.as_str());
172 }
173 let path_str = normalize_pct_encoded_unreserved(parsed.path().as_str());
174 if remove_path_digits {
175 out.push_str(&redact_path_digits(&path_str));
176 } else {
177 out.push_str(&path_str);
178 }
179 } else if let Some(scheme) = parsed.scheme() {
180 let scheme_end = scheme.as_str().len() + 1;
184 out.push_str(&url[scheme_end..path_end]);
186 } else {
187 let path_str = normalize_pct_encoded_unreserved(parsed.path().as_str());
189 if remove_path_digits {
190 out.push_str(&redact_path_digits(&path_str));
191 } else {
192 out.push_str(&path_str);
193 }
194 }
195
196 if remove_query_string {
199 if path_end < path_query_end {
200 out.push('?');
201 }
202 } else if path_end < path_query_end {
203 out.push_str(&url[path_end..path_query_end]);
205 }
206
207 if let Some(frag) = parsed.fragment() {
208 if !frag.as_str().is_empty() {
209 out.push('#');
210 out.push_str(frag.as_str());
211 }
212 }
213
214 out
215}
216
217#[cfg(test)]
218mod tests {
219 use duplicate::duplicate_item;
220
221 use super::obfuscate_url_string;
222
223 #[duplicate_item(
224 [
225 test_name [remove_query_string_1]
226 remove_query_string [true]
227 remove_path_digits [false]
228 input ["http://foo.com/"]
229 expected_output ["http://foo.com/"];
230 ]
231 [
232 test_name [remove_query_string_2]
233 remove_query_string [true]
234 remove_path_digits [false]
235 input ["http://foo.com/123"]
236 expected_output ["http://foo.com/123"];
237 ]
238 [
239 test_name [remove_query_string_3]
240 remove_query_string [true]
241 remove_path_digits [false]
242 input ["http://foo.com/id/123/page/1?search=bar&page=2"]
243 expected_output ["http://foo.com/id/123/page/1?"];
244 ]
245 [
246 test_name [remove_query_string_4]
247 remove_query_string [true]
248 remove_path_digits [false]
249 input ["http://foo.com/id/123/page/1?search=bar&page=2#fragment"]
250 expected_output ["http://foo.com/id/123/page/1?#fragment"];
251 ]
252 [
253 test_name [remove_query_string_5]
254 remove_query_string [true]
255 remove_path_digits [false]
256 input ["http://foo.com/id/123/page/1?blabla"]
257 expected_output ["http://foo.com/id/123/page/1?"];
258 ]
259 [
260 test_name [remove_query_string_6]
261 remove_query_string [true]
262 remove_path_digits [false]
263 input ["http://foo.com/id/123/pa%3Fge/1?blabla"]
264 expected_output ["http://foo.com/id/123/pa%3Fge/1?"];
265 ]
266 [
267 test_name [remove_query_string_7]
268 remove_query_string [true]
269 remove_path_digits [false]
270 input ["http://user:password@foo.com/1/2/3?q=james"]
271 expected_output ["http://foo.com/1/2/3?"];
272 ]
273 [
274 test_name [remove_path_digits_1]
275 remove_query_string [false]
276 remove_path_digits [true]
277 input ["http://foo.com/"]
278 expected_output ["http://foo.com/"];
279 ]
280 [
281 test_name [remove_path_digits_2]
282 remove_query_string [false]
283 remove_path_digits [true]
284 input ["http://foo.com/name?query=search"]
285 expected_output ["http://foo.com/name?query=search"];
286 ]
287 [
288 test_name [remove_path_digits_3]
289 remove_query_string [false]
290 remove_path_digits [true]
291 input ["http://foo.com/id/123/page/1?search=bar&page=2"]
292 expected_output ["http://foo.com/id/?/page/??search=bar&page=2"];
293 ]
294 [
295 test_name [remove_path_digits_4]
296 remove_query_string [false]
297 remove_path_digits [true]
298 input ["http://foo.com/id/a1/page/1qwe233?search=bar&page=2#fragment-123"]
299 expected_output ["http://foo.com/id/?/page/??search=bar&page=2#fragment-123"];
300 ]
301 [
302 test_name [remove_path_digits_5]
303 remove_query_string [false]
304 remove_path_digits [true]
305 input ["http://foo.com/123"]
306 expected_output ["http://foo.com/?"];
307 ]
308 [
309 test_name [remove_path_digits_6]
310 remove_query_string [false]
311 remove_path_digits [true]
312 input ["http://foo.com/123/abcd9"]
313 expected_output ["http://foo.com/?/?"];
314 ]
315 [
316 test_name [remove_path_digits_7]
317 remove_query_string [false]
318 remove_path_digits [true]
319 input ["http://foo.com/123/name/abcd9"]
320 expected_output ["http://foo.com/?/name/?"];
321 ]
322 [
323 test_name [remove_path_digits_8]
324 remove_query_string [false]
325 remove_path_digits [true]
326 input ["http://foo.com/1%3F3/nam%3Fe/abcd9"]
327 expected_output ["http://foo.com/?/nam%3Fe/?"];
328 ]
329 [
330 test_name [empty_input]
331 remove_query_string [false]
332 remove_path_digits [false]
333 input [""]
334 expected_output [""];
335 ]
336 [
337 test_name [non_printable_chars]
338 remove_query_string [false]
339 remove_path_digits [false]
340 input ["\u{10}"]
341 expected_output ["\u{10}"];
343 ]
344 [
345 test_name [non_printable_chars_and_unicode]
346 remove_query_string [true]
347 remove_path_digits [true]
348 input ["\u{10}ჸ"]
349 expected_output ["?"];
350 ]
351 [
352 test_name [hashtag]
353 remove_query_string [true]
354 remove_path_digits [true]
355 input ["#"]
356 expected_output [""];
357 ]
358 [
359 test_name [fuzzing_1050521893]
360 remove_query_string [true]
361 remove_path_digits [true]
362 input ["ჸ"]
363 expected_output ["%E1%83%B8"];
364 ]
365 [
366 test_name [fuzzing_594901251]
367 remove_query_string [true]
368 remove_path_digits [true]
369 input ["%"]
370 expected_output ["?"];
371 ]
372 [
373 test_name [fuzzing_3638045804]
374 remove_query_string [true]
375 remove_path_digits [true]
376 input ["."]
377 expected_output ["."];
378 ]
379 [
380 test_name [fuzzing_1928485962]
381 remove_query_string [true]
382 remove_path_digits [true]
383 input ["0"]
384 expected_output ["?"];
385 ]
386 [
387 test_name [fuzzing_4273565798]
388 remove_query_string [true]
389 remove_path_digits [true]
390 input ["!ჸ"]
391 expected_output ["%21%E1%83%B8"];
392 ]
393 [
394 test_name [fuzzing_1457007156]
395 remove_query_string [true]
396 remove_path_digits [true]
397 input ["!"]
398 expected_output ["!"];
399 ]
400 [
401 test_name [fuzzing_3119724369]
402 remove_query_string [true]
403 remove_path_digits [true]
404 input [":"]
405 expected_output ["?"];
406 ]
407 [
408 test_name [fuzzing_1092426409]
409 remove_query_string [true]
410 remove_path_digits [true]
411 input ["#ჸ"]
412 expected_output ["#%E1%83%B8"];
413 ]
414 [
415 test_name [fuzzing_1323831861]
416 remove_query_string [true]
417 remove_path_digits [true]
418 input ["#\u{01}"]
419 expected_output ["#%01"];
420 ]
421 [
422 test_name [fuzzing_35626170]
423 remove_query_string [true]
424 remove_path_digits [true]
425 input ["#\u{01}ჸ"]
426 expected_output ["#%01%E1%83%B8"];
427 ]
428 [
429 test_name [fuzzing_618280270]
430 remove_query_string [true]
431 remove_path_digits [true]
432 input ["\\"]
433 expected_output ["%5C"];
434 ]
435 [
436 test_name [fuzzing_1505427946]
437 remove_query_string [true]
438 remove_path_digits [true]
439 input ["[ჸ"]
440 expected_output ["%5B%E1%83%B8"];
441 ]
442 [
443 test_name [fuzzing_backslash_unicode]
444 remove_query_string [true]
445 remove_path_digits [true]
446 input ["\\ჸ"]
447 expected_output ["%5C%E1%83%B8"];
448 ]
449 [
450 test_name [fuzzing_2438023093]
451 remove_query_string [true]
452 remove_path_digits [true]
453 input ["ჸ#"]
454 expected_output ["%E1%83%B8"];
455 ]
456 [
457 test_name [fuzzing_2729083127]
458 remove_query_string [true]
459 remove_path_digits [true]
460 input ["!#ჸ"]
461 expected_output ["!#%E1%83%B8"];
462 ]
463 [
464 test_name [fuzzing_slash_unicode]
465 remove_query_string [true]
466 remove_path_digits [true]
467 input ["/ჸ"]
468 expected_output ["/%E1%83%B8"];
469 ]
470 [
471 test_name [fuzzing_3710129001]
472 remove_query_string [true]
473 remove_path_digits [true]
474 input ["##"]
475 expected_output ["#%23"];
476 ]
477 [
478 test_name [fuzzing_1009954227]
479 remove_query_string [true]
480 remove_path_digits [true]
481 input ["ჸ#\u{10}"]
482 expected_output ["%E1%83%B8#%10"];
483 ]
484 [
485 test_name [fuzzing_hash_exclamation]
486 remove_query_string [true]
487 remove_path_digits [true]
488 input ["ჸ#!"]
489 expected_output ["%E1%83%B8#!"];
490 ]
491 [
492 test_name [fuzzing_578834728]
493 remove_query_string [true]
494 remove_path_digits [true]
495 input ["#%"]
496 expected_output ["?"];
497 ]
498 [
499 test_name [fuzzing_3991369296]
500 remove_query_string [true]
501 remove_path_digits [true]
502 input ["#'ჸ"]
503 expected_output ["#%27%E1%83%B8"];
504 ]
505 [
506 test_name [fuzzing_path_frag_quote]
507 remove_query_string [true]
508 remove_path_digits [true]
509 input ["ჸ#'ჸ"]
510 expected_output ["%E1%83%B8#%27%E1%83%B8"];
511 ]
512 [
513 test_name [fuzzing_hash_excl_unicode]
514 remove_query_string [true]
515 remove_path_digits [true]
516 input ["#!ჸ"]
517 expected_output ["#!%E1%83%B8"];
518 ]
519 [
520 test_name [fuzzing_2455396347_cat1_triggers_cat2]
522 remove_query_string [true]
523 remove_path_digits [true]
524 input ["<!"]
525 expected_output ["%3C%21"];
526 ]
527 [
528 test_name [fuzzing_3886417401]
530 remove_query_string [true]
531 remove_path_digits [true]
532 input ["ჸ#%\u{1}"]
533 expected_output ["?"];
534 ]
535 [
536 test_name [parity_double_quote_cat1]
537 remove_query_string [true]
538 remove_path_digits [true]
539 input ["\"!"]
540 expected_output ["%22%21"];
541 ]
542 [
543 test_name [parity_dot_hash_unicode]
544 remove_query_string [true]
545 remove_path_digits [true]
546 input [".#ჸ"]
547 expected_output [".#%E1%83%B8"];
548 ]
549 [
550 test_name [parity_dot_hash]
551 remove_query_string [true]
552 remove_path_digits [true]
553 input [".#"]
554 expected_output ["."];
555 ]
556 [
557 test_name [parity_unicode_hash_digit]
558 remove_query_string [true]
559 remove_path_digits [true]
560 input ["ჸ#0"]
561 expected_output ["%E1%83%B8#0"];
562 ]
563 [
564 test_name [parity_scheme_empty_frag]
565 remove_query_string [true]
566 remove_path_digits [true]
567 input ["C:#"]
568 expected_output ["c:"];
569 ]
570 [
571 test_name [parity_relative_dotdot_unicode]
572 remove_query_string [true]
573 remove_path_digits [true]
574 input ["../ჸ"]
575 expected_output ["../%E1%83%B8"];
576 ]
577 [
578 test_name [parity_query_hash_unicode_both]
579 remove_query_string [true]
580 remove_path_digits [true]
581 input ["?#ჸ"]
582 expected_output ["?#%E1%83%B8"];
583 ]
584 [
585 test_name [parity_query_hash_unicode_digits]
586 remove_query_string [false]
587 remove_path_digits [true]
588 input ["?#ჸ"]
589 expected_output ["?#%E1%83%B8"];
590 ]
591 [
592 test_name [parity_excl_query_unicode]
593 remove_query_string [true]
594 remove_path_digits [true]
595 input ["!?ჸ"]
596 expected_output ["!?"];
597 ]
598 [
599 test_name [parity_query_unicode_keep]
600 remove_query_string [false]
601 remove_path_digits [true]
602 input ["?ჸ"]
603 expected_output ["?ჸ"];
604 ]
605 [
606 test_name [parity_space_unicode]
607 remove_query_string [true]
608 remove_path_digits [true]
609 input [" ჸ"]
610 expected_output ["%20%E1%83%B8"];
611 ]
612 [
613 test_name [parity_unicode_query_unicode_keep]
614 remove_query_string [false]
615 remove_path_digits [true]
616 input ["ჸ?ჸ"]
617 expected_output ["%E1%83%B8?ჸ"];
618 ]
619 [
620 test_name [parity_unicode_query_hash_both]
621 remove_query_string [true]
622 remove_path_digits [true]
623 input ["?ჸ#ჸ"]
624 expected_output ["?#%E1%83%B8"];
625 ]
626 [
627 test_name [parity_unicode_query_empty_hash]
628 remove_query_string [false]
629 remove_path_digits [true]
630 input ["ჸ?#"]
631 expected_output ["%E1%83%B8?"];
632 ]
633 [
634 test_name [parity_pct_unreserved_normalize]
635 remove_query_string [true]
636 remove_path_digits [false]
637 input ["%30ჸ"]
638 expected_output ["0%E1%83%B8"];
639 ]
640 [
641 test_name [parity_unicode_query_invalid_pct]
642 remove_query_string [true]
643 remove_path_digits [true]
644 input ["ჸ?%"]
645 expected_output ["%E1%83%B8?"];
646 ]
647 [
648 test_name [parity_not_a_url_both_false]
649 remove_query_string [false]
650 remove_path_digits [false]
651 input ["this is not a valid url"]
652 expected_output ["this%20is%20not%20a%20valid%20url"];
653 ]
654 [
655 test_name [parity_not_a_url_both_true]
656 remove_query_string [true]
657 remove_path_digits [true]
658 input ["this is not a valid url"]
659 expected_output ["this%20is%20not%20a%20valid%20url"];
660 ]
661 [
662 test_name [parity_disabled_userinfo]
663 remove_query_string [false]
664 remove_path_digits [false]
665 input ["http://user:password@foo.com/1/2/3?q=james"]
666 expected_output ["http://foo.com/1/2/3?q=james"];
667 ]
668 [
669 test_name [parity_colon_both_false]
670 remove_query_string [false]
671 remove_path_digits [false]
672 input [":"]
673 expected_output [":"];
674 ]
675 [
676 test_name [parity_pct_both_false]
677 remove_query_string [false]
678 remove_path_digits [false]
679 input ["%"]
680 expected_output ["%"];
681 ]
682 [
683 test_name [parity_ctrl_in_scheme_both_false]
684 remove_query_string [false]
685 remove_path_digits [false]
686 input ["C:\u{1}"]
687 expected_output ["C:\u{1}"];
688 ]
689 [
690 test_name [parity_ctrl_both_false]
691 remove_query_string [false]
692 remove_path_digits [false]
693 input ["\u{1}"]
694 expected_output ["\u{1}"];
695 ]
696 [
697 test_name [parity_frag_curly_brace]
698 remove_query_string [true]
699 remove_path_digits [true]
700 input ["ჸ#{ჸ"]
701 expected_output ["%E1%83%B8#%7B%E1%83%B8"];
702 ]
703 [
704 test_name [parity_opaque_url_unicode]
706 remove_query_string [true]
707 remove_path_digits [true]
708 input ["A:ჸ"]
709 expected_output ["a:ჸ"];
710 ]
711 [
712 test_name [no_decode_dash]
713 remove_query_string [false]
714 remove_path_digits [false]
715 input ["http://foo.com/foo%20bar/"]
716 expected_output ["http://foo.com/foo%20bar/"];
717 ]
718 [
719 test_name [parity_fuzzing_supp_unicode_frag]
722 remove_query_string [true]
723 remove_path_digits [true]
724 input ["\u{91cb8}\u{9232f}झ\u{44db0}#\u{3}\n\u{5bb50}\u{925d9}\u{925d5}\u{925d5}\u{925d5}\u{925d5}䕞\u{9a70d}\u{3d2ff}\u{10ef4f}\u{87307}\u{6}\u{10ef0a}\u{10ffff}\u{ad7e5}\u{33f}筚\u{361}➑\u{2}{\u{10de13}\u{10ffff}\u{10ffff}'"]
725 expected_output ["%F2%91%B2%B8%F2%92%8C%AF%E0%A4%9D%F1%84%B6%B0#%03%0A%F1%9B%AD%90%F2%92%97%99%F2%92%97%95%F2%92%97%95%F2%92%97%95%F2%92%97%95%E4%95%9E%F2%9A%9C%8D%F0%BD%8B%BF%F4%8E%BD%8F%F2%87%8C%87%06%F4%8E%BC%8A%F4%8F%BF%BF%F2%AD%9F%A5%CC%BF%E7%AD%9A%CD%A1%E2%9E%91%02%7B%F4%8D%B8%93%F4%8F%BF%BF%F4%8F%BF%BF%27"];
726 ]
727 )]
728 #[test]
729 fn test_name() {
730 let result = obfuscate_url_string(input, remove_query_string, remove_path_digits);
731 assert_eq!(result, expected_output);
732 }
733}