Skip to main content

libdd_trace_obfuscation/
replacer.rs

1// Copyright 2023-Present Datadog, Inc. https://www.datadoghq.com/
2// SPDX-License-Identifier: Apache-2.0
3
4use libdd_common::regex_engine::{Regex, Replacer};
5use libdd_trace_protobuf::pb;
6use serde::{ser::SerializeStruct, Deserialize, Deserializer, Serialize};
7
8#[derive(Deserialize)]
9struct RawReplaceRule {
10    name: String,
11    pattern: String,
12    repl: String,
13}
14
15impl PartialEq for ReplaceRule {
16    fn eq(&self, other: &Self) -> bool {
17        self.name == other.name && self.repl == other.repl && self.re.as_str() == other.re.as_str()
18    }
19}
20
21#[derive(Debug, Clone)]
22pub struct ReplaceRule {
23    // name specifies the name of the tag that the replace rule addresses. However,
24    // some exceptions apply such as:
25    // * "resource.name" will target the resource
26    // * "*" will target all tags and the resource
27    pub name: String,
28
29    // re holds the regex pattern for matching.
30    pub re: Regex,
31
32    // repl specifies the replacement string to be used when Pattern matches.
33    pub repl: String,
34
35    // does the replacement pattern contain references to the capture groups
36    pub no_expansion: bool,
37}
38
39impl<'de> Deserialize<'de> for ReplaceRule {
40    fn deserialize<D: Deserializer<'de>>(deserializer: D) -> Result<Self, D::Error> {
41        let raw = RawReplaceRule::deserialize(deserializer)?;
42        let re = Regex::new(&raw.pattern).map_err(serde::de::Error::custom)?;
43        let no_expansion = Replacer::no_expansion(&mut raw.repl.as_str()).is_some();
44        Ok(ReplaceRule {
45            name: raw.name,
46            re,
47            repl: raw.repl,
48            no_expansion,
49        })
50    }
51}
52
53impl Serialize for ReplaceRule {
54    fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
55    where
56        S: serde::Serializer,
57    {
58        let mut s = serializer.serialize_struct("ReplaceRule", 4)?;
59        s.serialize_field("name", &self.name)?;
60        s.serialize_field("re", &self.re.to_string())?;
61        s.serialize_field("repl", &self.repl)?;
62        s.serialize_field("no_expansion", &self.no_expansion)?;
63        s.end()
64    }
65}
66
67impl ReplaceRule {
68    fn apply(&self, tag_value: &mut String, scratch_space: &mut String) {
69        replace_all(
70            &self.re,
71            &self.repl,
72            self.no_expansion,
73            tag_value,
74            scratch_space,
75        )
76    }
77}
78
79/// replace_trace_tags replaces the tag values of all spans within a trace with a given set of
80/// rules.
81pub fn replace_trace_tags(trace: &mut [pb::Span], rules: &[ReplaceRule]) {
82    let mut scratch_space = String::new();
83    for span in trace.iter_mut() {
84        replace_span_tags(span, rules, &mut scratch_space);
85    }
86}
87
88/// replace_span_tags replaces the tag values of a span with a given set of rules.
89pub fn replace_span_tags(span: &mut pb::Span, rules: &[ReplaceRule], scratch_space: &mut String) {
90    for rule in rules {
91        match rule.name.as_ref() {
92            "*" => {
93                for (_, tag_value) in span.meta.iter_mut() {
94                    rule.apply(tag_value, scratch_space);
95                }
96            }
97            "resource.name" => {
98                rule.apply(&mut span.resource, scratch_space);
99            }
100            _ => {
101                if let Some(tag_value) = span.meta.get_mut(&rule.name) {
102                    rule.apply(tag_value, scratch_space);
103                }
104            }
105        }
106    }
107}
108
109/// parse_rules_from_string takes an array of rules, represented as an array of length 3 arrays
110/// holding the tag name, regex pattern, and replacement string as strings.
111/// * returns a vec of ReplaceRules
112pub fn parse_rules_from_string(
113    // rules: &'a [[&'a str; 3]],
114    rules: &str,
115) -> anyhow::Result<Vec<ReplaceRule>> {
116    let raw_rules = serde_json::from_str::<Vec<RawReplaceRule>>(rules)?;
117
118    let mut vec: Vec<ReplaceRule> = Vec::with_capacity(rules.len());
119
120    // for [name, pattern, repl] in rules {
121    for raw_rule in raw_rules {
122        let compiled_regex = match Regex::new(&raw_rule.pattern) {
123            Ok(res) => res,
124            Err(err) => {
125                anyhow::bail!("Obfuscator Error: Error while parsing rule: {}", err)
126            }
127        };
128        let no_expansion = Replacer::no_expansion(&mut &raw_rule.repl).is_some();
129        vec.push(ReplaceRule {
130            name: raw_rule.name,
131            re: compiled_regex,
132            repl: raw_rule.repl,
133            no_expansion,
134        });
135    }
136    Ok(vec)
137}
138
139/// Mutate the haystack by changing all occurences of the regex by the `replace` parameter
140/// using the scratch space provided
141///
142/// Taken from regex::replacen to use a reusable scratch space instead of allocating a new String
143/// https://docs.rs/regex/1.10.2/src/regex/regex/string.rs.html#890-944
144fn replace_all(
145    re: &Regex,
146    mut replace: &str,
147    no_expansion: bool,
148    haystack: &mut String,
149    scratch_space: &mut String,
150) {
151    // If we know that the replacement doesn't have any capture expansions,
152    // then we can use the fast path. The fast path can make a tremendous
153    // difference:
154    //
155    //   1) We use `find_iter` instead of `captures_iter`. Not asking for captures generally makes
156    //      the regex engines faster.
157    //   2) We don't need to look up all of the capture groups and do replacements inside the
158    //      replacement string. We just push it at each match and be done with it.
159    if no_expansion {
160        let mut it = re.find_iter(haystack).peekable();
161        if it.peek().is_none() {
162            return;
163        }
164        scratch_space.reserve(haystack.len());
165        let mut last_match = 0;
166        for m in it {
167            scratch_space.push_str(&haystack[last_match..m.start()]);
168            scratch_space.push_str(replace);
169            last_match = m.end();
170        }
171        scratch_space.push_str(&haystack[last_match..]);
172    } else {
173        // The slower path, which we use if the replacement may need access to
174        // capture groups.
175        let mut it = re.captures_iter(haystack).peekable();
176        if it.peek().is_none() {
177            return;
178        }
179        scratch_space.reserve(haystack.len());
180        let mut last_match = 0;
181        for cap in it {
182            // unwrap on 0 is OK because captures only reports matches
183            #[allow(clippy::unwrap_used)]
184            let m = cap.get(0).unwrap();
185            scratch_space.push_str(&haystack[last_match..m.start()]);
186            Replacer::replace_append(&mut replace, &cap, scratch_space);
187            last_match = m.end();
188        }
189        scratch_space.push_str(&haystack[last_match..]);
190    }
191    std::mem::swap(scratch_space, haystack);
192    scratch_space.truncate(0);
193}
194
195#[cfg(test)]
196mod tests {
197
198    use super::Regex;
199    use crate::replacer;
200    use duplicate::duplicate_item;
201    use libdd_trace_protobuf::pb;
202    use std::collections::HashMap;
203
204    fn new_test_span_with_tags(tags: HashMap<&str, &str>) -> pb::Span {
205        let mut span = pb::Span {
206            duration: 10000000,
207            error: 0,
208            resource: "GET /some/raclette".to_string(),
209            service: "django".to_string(),
210            name: "django.controller".to_string(),
211            span_id: 123,
212            start: 1448466874000000000,
213            trace_id: 424242,
214            meta: HashMap::new(),
215            metrics: HashMap::from([("cheese_weight".to_string(), 100000.0)]),
216            parent_id: 1111,
217            r#type: "http".to_string(),
218            meta_struct: HashMap::new(),
219            span_links: vec![],
220            span_events: vec![],
221        };
222        for (key, val) in tags {
223            match key {
224                "resource.name" => {
225                    span.resource = val.to_string();
226                }
227                _ => {
228                    span.meta.insert(key.to_string(), val.to_string());
229                }
230            }
231        }
232        span
233    }
234
235    #[duplicate_item(
236        [
237        test_name   [test_replace_tags]
238        rules       [r#"[
239                        {"name": "http.url", "pattern": "(token/)([^/]*)", "repl": "${1}?"},
240                        {"name": "http.url", "pattern": "guid", "repl": "[REDACTED]"},
241                        {"name": "custom.tag", "pattern": "(/foo/bar/).*", "repl": "${1}extra"}
242                    ]"#]
243        input       [
244                        HashMap::from([
245                            ("http.url", "some/guid/token/abcdef/abc"),
246                            ("custom.tag", "/foo/bar/foo"),
247                        ])
248                    ]
249        expected    [
250                        HashMap::from([
251                            ("http.url", "some/[REDACTED]/token/?/abc"),
252                            ("custom.tag", "/foo/bar/extra"),
253                        ])
254                    ];
255        ]
256        [
257        test_name   [test_replace_tags_with_exceptions]
258        rules       [r#"[
259                        {"name": "*", "pattern": "(token/)([^/]*)", "repl": "${1}?"},
260                        {"name": "*", "pattern": "this", "repl": "that"},
261                        {"name": "http.url", "pattern": "guid", "repl": "[REDACTED]"},
262                        {"name": "custom.tag", "pattern": "(/foo/bar/).*", "repl": "${1}extra"},
263                        {"name": "resource.name", "pattern": "prod", "repl": "stage"}
264                    ]"#]
265        input       [
266                        HashMap::from([
267                            ("resource.name", "this is prod"),
268                            ("http.url", "some/[REDACTED]/token/abcdef/abc"),
269                            ("other.url", "some/guid/token/abcdef/abc"),
270                            ("custom.tag", "/foo/bar/foo"),
271                        ])
272                    ]
273        expected    [
274                        HashMap::from([
275                            ("resource.name", "this is stage"),
276                            ("http.url", "some/[REDACTED]/token/?/abc"),
277                            ("other.url", "some/guid/token/?/abc"),
278                            ("custom.tag", "/foo/bar/extra"),
279                        ])
280                    ];
281        ]
282    )]
283    #[test]
284    #[cfg_attr(miri, ignore)]
285    fn test_name() {
286        let parsed_rules = replacer::parse_rules_from_string(rules);
287
288        let root_span = new_test_span_with_tags(input);
289        let child_span = new_test_span_with_tags(input);
290        let mut trace = [root_span, child_span];
291
292        replacer::replace_trace_tags(&mut trace, &parsed_rules.unwrap());
293
294        for (key, val) in expected {
295            match key {
296                "resource.name" => {
297                    assert_eq!(val, trace[0].resource);
298                    assert_eq!(val, trace[1].resource);
299                }
300                _ => {
301                    assert_eq!(val, trace[0].meta.get(key).unwrap());
302                    assert_eq!(val, trace[1].meta.get(key).unwrap());
303                }
304            }
305        }
306    }
307
308    #[test]
309    fn test_parse_rules_invalid_regex() {
310        let result = replacer::parse_rules_from_string(r#"[{"http.url", ")", "${1}?"}]"#);
311        assert!(result.is_err());
312    }
313
314    #[test]
315    #[cfg_attr(miri, ignore)]
316    fn test_replace_rule_eq() {
317        let rule1 = replacer::ReplaceRule {
318            name: "http.url".to_string(),
319            re: Regex::new("(token/)([^/]*)").unwrap(),
320            repl: "${1}?".to_string(),
321            no_expansion: false,
322        };
323        let rule2 = replacer::ReplaceRule {
324            name: "http.url".to_string(),
325            re: Regex::new("(token/)([^/]*)").unwrap(),
326            repl: "${1}?".to_string(),
327            no_expansion: false,
328        };
329        assert_eq!(rule1, rule2);
330    }
331
332    #[test]
333    #[cfg_attr(miri, ignore)]
334    fn test_replace_rule_neq() {
335        let rule1 = replacer::ReplaceRule {
336            name: "http.url".to_string(),
337            re: Regex::new("(token/)([^/]*)").unwrap(),
338            repl: "${1}?".to_string(),
339            no_expansion: false,
340        };
341        let rule2 = replacer::ReplaceRule {
342            name: "http.url".to_string(),
343            re: Regex::new("(broken/)([^/]*)").unwrap(),
344            repl: "${1}?".to_string(),
345            no_expansion: false,
346        };
347        assert_ne!(rule1, rule2);
348    }
349}