Skip to main content

libdd_trace_obfuscation/
replacer.rs

1// Copyright 2023-Present Datadog, Inc. https://www.datadoghq.com/
2// SPDX-License-Identifier: Apache-2.0
3
4use libdd_trace_protobuf::pb;
5use regex::Regex;
6use serde::Deserialize;
7
8#[derive(Deserialize)]
9struct RawReplaceRule {
10    name: String,
11    pattern: String,
12    repl: String,
13}
14
15impl PartialEq for ReplaceRule {
16    fn eq(&self, other: &Self) -> bool {
17        self.name == other.name && self.repl == other.repl && self.re.as_str() == other.re.as_str()
18    }
19}
20
21#[derive(Debug, Clone)]
22pub struct ReplaceRule {
23    // name specifies the name of the tag that the replace rule addresses. However,
24    // some exceptions apply such as:
25    // * "resource.name" will target the resource
26    // * "*" will target all tags and the resource
27    name: String,
28
29    // re holds the regex pattern for matching.
30    re: regex::Regex,
31
32    // repl specifies the replacement string to be used when Pattern matches.
33    repl: String,
34
35    // does the replacement pattern contain references to the capture groups
36    no_expansion: bool,
37}
38
39impl ReplaceRule {
40    fn apply(&self, tag_value: &mut String, scratch_space: &mut String) {
41        replace_all(
42            &self.re,
43            &self.repl,
44            self.no_expansion,
45            tag_value,
46            scratch_space,
47        )
48    }
49}
50
51/// replace_trace_tags replaces the tag values of all spans within a trace with a given set of
52/// rules.
53pub fn replace_trace_tags(trace: &mut [pb::Span], rules: &[ReplaceRule]) {
54    let mut scratch_space = String::new();
55    for span in trace.iter_mut() {
56        replace_span_tags(span, rules, &mut scratch_space);
57    }
58}
59
60/// replace_span_tags replaces the tag values of a span with a given set of rules.
61pub fn replace_span_tags(span: &mut pb::Span, rules: &[ReplaceRule], scratch_space: &mut String) {
62    for rule in rules {
63        match rule.name.as_ref() {
64            "*" => {
65                for (_, tag_value) in span.meta.iter_mut() {
66                    rule.apply(tag_value, scratch_space);
67                }
68            }
69            "resource.name" => {
70                rule.apply(&mut span.resource, scratch_space);
71            }
72            _ => {
73                if let Some(tag_value) = span.meta.get_mut(&rule.name) {
74                    rule.apply(tag_value, scratch_space);
75                }
76            }
77        }
78    }
79}
80
81/// parse_rules_from_string takes an array of rules, represented as an array of length 3 arrays
82/// holding the tag name, regex pattern, and replacement string as strings.
83/// * returns a vec of ReplaceRules
84pub fn parse_rules_from_string(
85    // rules: &'a [[&'a str; 3]],
86    rules: &str,
87) -> anyhow::Result<Vec<ReplaceRule>> {
88    let raw_rules = serde_json::from_str::<Vec<RawReplaceRule>>(rules)?;
89
90    let mut vec: Vec<ReplaceRule> = Vec::with_capacity(rules.len());
91
92    // for [name, pattern, repl] in rules {
93    for raw_rule in raw_rules {
94        let compiled_regex = match Regex::new(&raw_rule.pattern) {
95            Ok(res) => res,
96            Err(err) => {
97                anyhow::bail!("Obfuscator Error: Error while parsing rule: {}", err)
98            }
99        };
100        let no_expansion = regex::Replacer::no_expansion(&mut &raw_rule.repl).is_some();
101        vec.push(ReplaceRule {
102            name: raw_rule.name,
103            re: compiled_regex,
104            repl: raw_rule.repl,
105            no_expansion,
106        });
107    }
108    Ok(vec)
109}
110
111/// Mutate the haystack by changing all occurences of the regex by the `replace` parameter
112/// using the scratch space provided
113///
114/// Taken from regex::replacen to use a reusable scratch space instead of allocating a new String
115/// https://docs.rs/regex/1.10.2/src/regex/regex/string.rs.html#890-944
116fn replace_all(
117    re: &Regex,
118    mut replace: &str,
119    no_expansion: bool,
120    haystack: &mut String,
121    scratch_space: &mut String,
122) {
123    // If we know that the replacement doesn't have any capture expansions,
124    // then we can use the fast path. The fast path can make a tremendous
125    // difference:
126    //
127    //   1) We use `find_iter` instead of `captures_iter`. Not asking for captures generally makes
128    //      the regex engines faster.
129    //   2) We don't need to look up all of the capture groups and do replacements inside the
130    //      replacement string. We just push it at each match and be done with it.
131    if no_expansion {
132        let mut it = re.find_iter(haystack).peekable();
133        if it.peek().is_none() {
134            return;
135        }
136        scratch_space.reserve(haystack.len());
137        let mut last_match = 0;
138        for m in it {
139            scratch_space.push_str(&haystack[last_match..m.start()]);
140            scratch_space.push_str(replace);
141            last_match = m.end();
142        }
143        scratch_space.push_str(&haystack[last_match..]);
144    } else {
145        // The slower path, which we use if the replacement may need access to
146        // capture groups.
147        let mut it = re.captures_iter(haystack).peekable();
148        if it.peek().is_none() {
149            return;
150        }
151        scratch_space.reserve(haystack.len());
152        let mut last_match = 0;
153        for cap in it {
154            // unwrap on 0 is OK because captures only reports matches
155            #[allow(clippy::unwrap_used)]
156            let m = cap.get(0).unwrap();
157            scratch_space.push_str(&haystack[last_match..m.start()]);
158            regex::Replacer::replace_append(&mut replace, &cap, scratch_space);
159            last_match = m.end();
160        }
161        scratch_space.push_str(&haystack[last_match..]);
162    }
163    std::mem::swap(scratch_space, haystack);
164    scratch_space.truncate(0);
165}
166
167#[cfg(test)]
168mod tests {
169
170    use crate::replacer;
171    use duplicate::duplicate_item;
172    use libdd_trace_protobuf::pb;
173    use std::collections::HashMap;
174
175    fn new_test_span_with_tags(tags: HashMap<&str, &str>) -> pb::Span {
176        let mut span = pb::Span {
177            duration: 10000000,
178            error: 0,
179            resource: "GET /some/raclette".to_string(),
180            service: "django".to_string(),
181            name: "django.controller".to_string(),
182            span_id: 123,
183            start: 1448466874000000000,
184            trace_id: 424242,
185            meta: HashMap::new(),
186            metrics: HashMap::from([("cheese_weight".to_string(), 100000.0)]),
187            parent_id: 1111,
188            r#type: "http".to_string(),
189            meta_struct: HashMap::new(),
190            span_links: vec![],
191            span_events: vec![],
192        };
193        for (key, val) in tags {
194            match key {
195                "resource.name" => {
196                    span.resource = val.to_string();
197                }
198                _ => {
199                    span.meta.insert(key.to_string(), val.to_string());
200                }
201            }
202        }
203        span
204    }
205
206    #[duplicate_item(
207        [
208        test_name   [test_replace_tags]
209        rules       [r#"[
210                        {"name": "http.url", "pattern": "(token/)([^/]*)", "repl": "${1}?"},
211                        {"name": "http.url", "pattern": "guid", "repl": "[REDACTED]"},
212                        {"name": "custom.tag", "pattern": "(/foo/bar/).*", "repl": "${1}extra"}
213                    ]"#]
214        input       [
215                        HashMap::from([
216                            ("http.url", "some/guid/token/abcdef/abc"),
217                            ("custom.tag", "/foo/bar/foo"),
218                        ])
219                    ]
220        expected    [
221                        HashMap::from([
222                            ("http.url", "some/[REDACTED]/token/?/abc"),
223                            ("custom.tag", "/foo/bar/extra"),
224                        ])
225                    ];
226        ]
227        [
228        test_name   [test_replace_tags_with_exceptions]
229        rules       [r#"[
230                        {"name": "*", "pattern": "(token/)([^/]*)", "repl": "${1}?"},
231                        {"name": "*", "pattern": "this", "repl": "that"},
232                        {"name": "http.url", "pattern": "guid", "repl": "[REDACTED]"},
233                        {"name": "custom.tag", "pattern": "(/foo/bar/).*", "repl": "${1}extra"},
234                        {"name": "resource.name", "pattern": "prod", "repl": "stage"}
235                    ]"#]
236        input       [
237                        HashMap::from([
238                            ("resource.name", "this is prod"),
239                            ("http.url", "some/[REDACTED]/token/abcdef/abc"),
240                            ("other.url", "some/guid/token/abcdef/abc"),
241                            ("custom.tag", "/foo/bar/foo"),
242                        ])
243                    ]
244        expected    [
245                        HashMap::from([
246                            ("resource.name", "this is stage"),
247                            ("http.url", "some/[REDACTED]/token/?/abc"),
248                            ("other.url", "some/guid/token/?/abc"),
249                            ("custom.tag", "/foo/bar/extra"),
250                        ])
251                    ];
252        ]
253    )]
254    #[test]
255    #[cfg_attr(miri, ignore)]
256    fn test_name() {
257        let parsed_rules = replacer::parse_rules_from_string(rules);
258
259        let root_span = new_test_span_with_tags(input);
260        let child_span = new_test_span_with_tags(input);
261        let mut trace = [root_span, child_span];
262
263        replacer::replace_trace_tags(&mut trace, &parsed_rules.unwrap());
264
265        for (key, val) in expected {
266            match key {
267                "resource.name" => {
268                    assert_eq!(val, trace[0].resource);
269                    assert_eq!(val, trace[1].resource);
270                }
271                _ => {
272                    assert_eq!(val, trace[0].meta.get(key).unwrap());
273                    assert_eq!(val, trace[1].meta.get(key).unwrap());
274                }
275            }
276        }
277    }
278
279    #[test]
280    fn test_parse_rules_invalid_regex() {
281        let result = replacer::parse_rules_from_string(r#"[{"http.url", ")", "${1}?"}]"#);
282        assert!(result.is_err());
283    }
284
285    #[test]
286    #[cfg_attr(miri, ignore)]
287    fn test_replace_rule_eq() {
288        let rule1 = replacer::ReplaceRule {
289            name: "http.url".to_string(),
290            re: regex::Regex::new("(token/)([^/]*)").unwrap(),
291            repl: "${1}?".to_string(),
292            no_expansion: false,
293        };
294        let rule2 = replacer::ReplaceRule {
295            name: "http.url".to_string(),
296            re: regex::Regex::new("(token/)([^/]*)").unwrap(),
297            repl: "${1}?".to_string(),
298            no_expansion: false,
299        };
300        assert_eq!(rule1, rule2);
301    }
302
303    #[test]
304    #[cfg_attr(miri, ignore)]
305    fn test_replace_rule_neq() {
306        let rule1 = replacer::ReplaceRule {
307            name: "http.url".to_string(),
308            re: regex::Regex::new("(token/)([^/]*)").unwrap(),
309            repl: "${1}?".to_string(),
310            no_expansion: false,
311        };
312        let rule2 = replacer::ReplaceRule {
313            name: "http.url".to_string(),
314            re: regex::Regex::new("(broken/)([^/]*)").unwrap(),
315            repl: "${1}?".to_string(),
316            no_expansion: false,
317        };
318        assert_ne!(rule1, rule2);
319    }
320}