libdd_trace_obfuscation/
http.rs

1// Copyright 2023-Present Datadog, Inc. https://www.datadoghq.com/
2// SPDX-License-Identifier: Apache-2.0
3
4use percent_encoding::percent_decode_str;
5use url::Url;
6
7pub fn obfuscate_url_string(
8    url: &str,
9    remove_query_string: bool,
10    remove_path_digits: bool,
11) -> String {
12    let mut parsed_url = match Url::parse(url) {
13        Ok(res) => res,
14        Err(_) => return "?".to_string(),
15    };
16
17    // remove username & password
18    parsed_url.set_username("").unwrap_or_default();
19    parsed_url.set_password(Some("")).unwrap_or_default();
20
21    if remove_query_string && parsed_url.query().is_some() {
22        parsed_url.set_query(Some(""));
23    }
24
25    if !remove_path_digits {
26        return parsed_url.to_string();
27    }
28
29    // remove path digits
30    let mut split_url: Vec<&str> = parsed_url.path().split('/').collect();
31    let mut changed = false;
32    for segment in split_url.iter_mut() {
33        // we don't want to redact any HTML encodings
34        #[allow(clippy::unwrap_used)]
35        let decoded = percent_decode_str(segment).decode_utf8().unwrap();
36        if decoded.chars().any(|c| char::is_ascii_digit(&c)) {
37            *segment = "/REDACTED/";
38            changed = true;
39        }
40    }
41    if changed {
42        parsed_url.set_path(&split_url.join("/"));
43    }
44
45    parsed_url.to_string().replace("/REDACTED/", "?")
46}
47
48#[cfg(test)]
49mod tests {
50    use duplicate::duplicate_item;
51
52    use super::obfuscate_url_string;
53
54    #[duplicate_item(
55        [
56            test_name           [remove_query_string_1]
57            remove_query_string [true]
58            remove_path_digits  [false]
59            input               ["http://foo.com/"]
60            expected_output     ["http://foo.com/"];
61        ]
62        [
63            test_name           [remove_query_string_2]
64            remove_query_string [true]
65            remove_path_digits  [false]
66            input               ["http://foo.com/123"]
67            expected_output     ["http://foo.com/123"];
68        ]
69        [
70            test_name           [remove_query_string_3]
71            remove_query_string [true]
72            remove_path_digits  [false]
73            input               ["http://foo.com/id/123/page/1?search=bar&page=2"]
74            expected_output     ["http://foo.com/id/123/page/1?"];
75        ]
76        [
77            test_name           [remove_query_string_4]
78            remove_query_string [true]
79            remove_path_digits  [false]
80            input               ["http://foo.com/id/123/page/1?search=bar&page=2#fragment"]
81            expected_output     ["http://foo.com/id/123/page/1?#fragment"];
82        ]
83        [
84            test_name           [remove_query_string_5]
85            remove_query_string [true]
86            remove_path_digits  [false]
87            input               ["http://foo.com/id/123/page/1?blabla"]
88            expected_output     ["http://foo.com/id/123/page/1?"];
89        ]
90        [
91            test_name           [remove_query_string_6]
92            remove_query_string [true]
93            remove_path_digits  [false]
94            input               ["http://foo.com/id/123/pa%3Fge/1?blabla"]
95            expected_output     ["http://foo.com/id/123/pa%3Fge/1?"];
96        ]
97        [
98            test_name           [remove_query_string_7]
99            remove_query_string [true]
100            remove_path_digits  [false]
101            input               ["http://user:password@foo.com/1/2/3?q=james"]
102            expected_output     ["http://foo.com/1/2/3?"];
103        ]
104        [
105            test_name           [remove_path_digits_1]
106            remove_query_string [false]
107            remove_path_digits  [true]
108            input               ["http://foo.com/"]
109            expected_output     ["http://foo.com/"];
110        ]
111        [
112            test_name           [remove_path_digits_2]
113            remove_query_string [false]
114            remove_path_digits  [true]
115            input               ["http://foo.com/name?query=search"]
116            expected_output     ["http://foo.com/name?query=search"];
117        ]
118        [
119            test_name           [remove_path_digits_3]
120            remove_query_string [false]
121            remove_path_digits  [true]
122            input               ["http://foo.com/id/123/page/1?search=bar&page=2"]
123            expected_output     ["http://foo.com/id/?/page/??search=bar&page=2"];
124        ]
125        [
126            test_name           [remove_path_digits_4]
127            remove_query_string [false]
128            remove_path_digits  [true]
129            input               ["http://foo.com/id/a1/page/1qwe233?search=bar&page=2#fragment-123"]
130            expected_output     ["http://foo.com/id/?/page/??search=bar&page=2#fragment-123"];
131        ]
132        [
133            test_name           [remove_path_digits_5]
134            remove_query_string [false]
135            remove_path_digits  [true]
136            input               ["http://foo.com/123"]
137            expected_output     ["http://foo.com/?"];
138        ]
139        [
140            test_name           [remove_path_digits_6]
141            remove_query_string [false]
142            remove_path_digits  [true]
143            input               ["http://foo.com/123/abcd9"]
144            expected_output     ["http://foo.com/?/?"];
145        ]
146        [
147            test_name           [remove_path_digits_7]
148            remove_query_string [false]
149            remove_path_digits  [true]
150            input               ["http://foo.com/123/name/abcd9"]
151            expected_output     ["http://foo.com/?/name/?"];
152        ]
153        [
154            test_name           [remove_path_digits_8]
155            remove_query_string [false]
156            remove_path_digits  [true]
157            input               ["http://foo.com/1%3F3/nam%3Fe/abcd9"]
158            expected_output     ["http://foo.com/?/nam%3Fe/?"];
159        ]
160        [
161            test_name           [remove_path_digits_9]
162            remove_query_string [false]
163            remove_path_digits  [true]
164            input               ["http://user:password@foo.com/1/2/3?q=james"]
165            expected_output     ["http://foo.com/?/?/??q=james"];
166        ]
167    )]
168    #[test]
169    fn test_name() {
170        let result = obfuscate_url_string(input, remove_query_string, remove_path_digits);
171        assert_eq!(result, expected_output);
172    }
173}