Skip to main content

libdd_trace_obfuscation/
replacer.rs

1// Copyright 2023-Present Datadog, Inc. https://www.datadoghq.com/
2// SPDX-License-Identifier: Apache-2.0
3
4use libdd_common::regex_engine::{Regex, Replacer};
5use libdd_trace_protobuf::pb;
6use serde::{ser::SerializeStruct, Deserialize, Deserializer, Serialize};
7
8#[derive(Deserialize)]
9struct RawReplaceRule {
10    name: String,
11    pattern: String,
12    repl: String,
13}
14
15impl PartialEq for ReplaceRule {
16    fn eq(&self, other: &Self) -> bool {
17        self.name == other.name && self.repl == other.repl && self.re.as_str() == other.re.as_str()
18    }
19}
20
21#[derive(Debug, Clone)]
22pub struct ReplaceRule {
23    // name specifies the name of the tag that the replace rule addresses. However,
24    // some exceptions apply such as:
25    // * "resource.name" will target the resource
26    // * "*" will target all tags and the resource
27    pub name: String,
28
29    // re holds the regex pattern for matching.
30    pub re: Regex,
31
32    // repl specifies the replacement string to be used when Pattern matches.
33    pub repl: String,
34
35    // does the replacement pattern contain references to the capture groups
36    pub no_expansion: bool,
37}
38
39impl<'de> Deserialize<'de> for ReplaceRule {
40    fn deserialize<D: Deserializer<'de>>(deserializer: D) -> Result<Self, D::Error> {
41        let raw = RawReplaceRule::deserialize(deserializer)?;
42        let re = Regex::new(&raw.pattern).map_err(serde::de::Error::custom)?;
43        let no_expansion = Replacer::no_expansion(&mut raw.repl.as_str()).is_some();
44        Ok(Self {
45            name: raw.name,
46            re,
47            repl: raw.repl,
48            no_expansion,
49        })
50    }
51}
52
53impl Serialize for ReplaceRule {
54    fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
55    where
56        S: serde::Serializer,
57    {
58        let mut s = serializer.serialize_struct("ReplaceRule", 4)?;
59        s.serialize_field("name", &self.name)?;
60        s.serialize_field("re", &self.re.to_string())?;
61        s.serialize_field("repl", &self.repl)?;
62        s.serialize_field("no_expansion", &self.no_expansion)?;
63        s.end()
64    }
65}
66
67impl ReplaceRule {
68    fn apply(&self, tag_value: &mut String, scratch_space: &mut String) {
69        replace_all(
70            &self.re,
71            &self.repl,
72            self.no_expansion,
73            tag_value,
74            scratch_space,
75        );
76    }
77}
78
79/// `replace_trace_tags` replaces the tag values of all spans within a trace with a given set of
80/// rules.
81pub fn replace_trace_tags(trace: &mut [pb::Span], rules: &[ReplaceRule]) {
82    let mut scratch_space = String::new();
83    for span in trace.iter_mut() {
84        replace_span_tags(span, rules, &mut scratch_space);
85    }
86}
87
88/// `replace_span_tags` replaces the tag values of a span with a given set of rules.
89pub fn replace_span_tags(span: &mut pb::Span, rules: &[ReplaceRule], scratch_space: &mut String) {
90    for rule in rules {
91        match rule.name.as_ref() {
92            "*" => {
93                for tag_value in span.meta.values_mut() {
94                    rule.apply(tag_value, scratch_space);
95                }
96            }
97            "resource.name" => {
98                rule.apply(&mut span.resource, scratch_space);
99            }
100            _ => {
101                if let Some(tag_value) = span.meta.get_mut(&rule.name) {
102                    rule.apply(tag_value, scratch_space);
103                }
104            }
105        }
106    }
107}
108
109/// `parse_rules_from_string` takes an array of rules, represented as an array of length 3 arrays
110/// holding the tag name, regex pattern, and replacement string as strings.
111/// * returns a vec of `ReplaceRules`
112///
113/// # Errors
114///
115/// Returns an error when the input is not valid JSON or a rule pattern is not a valid regex.
116pub fn parse_rules_from_string(
117    // rules: &'a [[&'a str; 3]],
118    rules: &str,
119) -> anyhow::Result<Vec<ReplaceRule>> {
120    let raw_rules = serde_json::from_str::<Vec<RawReplaceRule>>(rules)?;
121
122    let mut vec: Vec<ReplaceRule> = Vec::with_capacity(rules.len());
123
124    // for [name, pattern, repl] in rules {
125    for raw_rule in raw_rules {
126        let compiled_regex = match Regex::new(&raw_rule.pattern) {
127            Ok(res) => res,
128            Err(err) => {
129                anyhow::bail!("Obfuscator Error: Error while parsing rule: {}", err)
130            }
131        };
132        let no_expansion = Replacer::no_expansion(&mut &raw_rule.repl).is_some();
133        vec.push(ReplaceRule {
134            name: raw_rule.name,
135            re: compiled_regex,
136            repl: raw_rule.repl,
137            no_expansion,
138        });
139    }
140    Ok(vec)
141}
142
143/// Mutate the haystack by changing all occurences of the regex by the `replace` parameter
144/// using the scratch space provided
145///
146/// Taken from `regex::replacen` to use a reusable scratch space instead of allocating a new String
147/// <https://docs.rs/regex/1.10.2/src/regex/regex/string.rs.html#890-944>
148fn replace_all(
149    re: &Regex,
150    mut replace: &str,
151    no_expansion: bool,
152    haystack: &mut String,
153    scratch_space: &mut String,
154) {
155    // If we know that the replacement doesn't have any capture expansions,
156    // then we can use the fast path. The fast path can make a tremendous
157    // difference:
158    //
159    //   1) We use `find_iter` instead of `captures_iter`. Not asking for captures generally makes
160    //      the regex engines faster.
161    //   2) We don't need to look up all of the capture groups and do replacements inside the
162    //      replacement string. We just push it at each match and be done with it.
163    if no_expansion {
164        let mut it = re.find_iter(haystack).peekable();
165        if it.peek().is_none() {
166            return;
167        }
168        scratch_space.reserve(haystack.len());
169        let mut last_match = 0;
170        for m in it {
171            scratch_space.push_str(&haystack[last_match..m.start()]);
172            scratch_space.push_str(replace);
173            last_match = m.end();
174        }
175        scratch_space.push_str(&haystack[last_match..]);
176    } else {
177        // The slower path, which we use if the replacement may need access to
178        // capture groups.
179        let mut it = re.captures_iter(haystack).peekable();
180        if it.peek().is_none() {
181            return;
182        }
183        scratch_space.reserve(haystack.len());
184        let mut last_match = 0;
185        for cap in it {
186            // unwrap on 0 is OK because captures only reports matches
187            #[allow(clippy::unwrap_used)]
188            let m = cap.get(0).unwrap();
189            scratch_space.push_str(&haystack[last_match..m.start()]);
190            Replacer::replace_append(&mut replace, &cap, scratch_space);
191            last_match = m.end();
192        }
193        scratch_space.push_str(&haystack[last_match..]);
194    }
195    std::mem::swap(scratch_space, haystack);
196    scratch_space.truncate(0);
197}
198
199#[cfg(test)]
200mod tests {
201
202    use super::Regex;
203    use crate::replacer;
204    use duplicate::duplicate_item;
205    use libdd_trace_protobuf::pb;
206    use std::collections::HashMap;
207
208    fn new_test_span_with_tags(tags: HashMap<&str, &str>) -> pb::Span {
209        let mut span = pb::Span {
210            duration: 10000000,
211            error: 0,
212            resource: "GET /some/raclette".to_string(),
213            service: "django".to_string(),
214            name: "django.controller".to_string(),
215            span_id: 123,
216            start: 1448466874000000000,
217            trace_id: 424242,
218            meta: HashMap::new(),
219            metrics: HashMap::from([("cheese_weight".to_string(), 100000.0)]),
220            parent_id: 1111,
221            r#type: "http".to_string(),
222            meta_struct: HashMap::new(),
223            span_links: vec![],
224            span_events: vec![],
225        };
226        for (key, val) in tags {
227            match key {
228                "resource.name" => {
229                    span.resource = val.to_string();
230                }
231                _ => {
232                    span.meta.insert(key.to_string(), val.to_string());
233                }
234            }
235        }
236        span
237    }
238
239    #[duplicate_item(
240        [
241        test_name   [test_replace_tags]
242        rules       [r#"[
243                        {"name": "http.url", "pattern": "(token/)([^/]*)", "repl": "${1}?"},
244                        {"name": "http.url", "pattern": "guid", "repl": "[REDACTED]"},
245                        {"name": "custom.tag", "pattern": "(/foo/bar/).*", "repl": "${1}extra"}
246                    ]"#]
247        input       [
248                        HashMap::from([
249                            ("http.url", "some/guid/token/abcdef/abc"),
250                            ("custom.tag", "/foo/bar/foo"),
251                        ])
252                    ]
253        expected    [
254                        HashMap::from([
255                            ("http.url", "some/[REDACTED]/token/?/abc"),
256                            ("custom.tag", "/foo/bar/extra"),
257                        ])
258                    ];
259        ]
260        [
261        test_name   [test_replace_tags_with_exceptions]
262        rules       [r#"[
263                        {"name": "*", "pattern": "(token/)([^/]*)", "repl": "${1}?"},
264                        {"name": "*", "pattern": "this", "repl": "that"},
265                        {"name": "http.url", "pattern": "guid", "repl": "[REDACTED]"},
266                        {"name": "custom.tag", "pattern": "(/foo/bar/).*", "repl": "${1}extra"},
267                        {"name": "resource.name", "pattern": "prod", "repl": "stage"}
268                    ]"#]
269        input       [
270                        HashMap::from([
271                            ("resource.name", "this is prod"),
272                            ("http.url", "some/[REDACTED]/token/abcdef/abc"),
273                            ("other.url", "some/guid/token/abcdef/abc"),
274                            ("custom.tag", "/foo/bar/foo"),
275                        ])
276                    ]
277        expected    [
278                        HashMap::from([
279                            ("resource.name", "this is stage"),
280                            ("http.url", "some/[REDACTED]/token/?/abc"),
281                            ("other.url", "some/guid/token/?/abc"),
282                            ("custom.tag", "/foo/bar/extra"),
283                        ])
284                    ];
285        ]
286    )]
287    #[test]
288    #[cfg_attr(miri, ignore)]
289    fn test_name() {
290        let parsed_rules = replacer::parse_rules_from_string(rules);
291
292        let root_span = new_test_span_with_tags(input);
293        let child_span = new_test_span_with_tags(input);
294        let mut trace = [root_span, child_span];
295
296        replacer::replace_trace_tags(&mut trace, &parsed_rules.unwrap());
297
298        for (key, val) in expected {
299            if key == "resource.name" {
300                assert_eq!(val, trace[0].resource);
301                assert_eq!(val, trace[1].resource);
302            } else {
303                assert_eq!(val, trace[0].meta.get(key).unwrap());
304                assert_eq!(val, trace[1].meta.get(key).unwrap());
305            }
306        }
307    }
308
309    #[test]
310    fn test_parse_rules_invalid_regex() {
311        let result = replacer::parse_rules_from_string(r#"[{"http.url", ")", "${1}?"}]"#);
312        assert!(result.is_err());
313    }
314
315    #[test]
316    #[cfg_attr(miri, ignore)]
317    fn test_replace_rule_eq() {
318        let rule1 = replacer::ReplaceRule {
319            name: "http.url".to_string(),
320            re: Regex::new("(token/)([^/]*)").unwrap(),
321            repl: "${1}?".to_string(),
322            no_expansion: false,
323        };
324        let rule2 = replacer::ReplaceRule {
325            name: "http.url".to_string(),
326            re: Regex::new("(token/)([^/]*)").unwrap(),
327            repl: "${1}?".to_string(),
328            no_expansion: false,
329        };
330        assert_eq!(rule1, rule2);
331    }
332
333    #[test]
334    #[cfg_attr(miri, ignore)]
335    fn test_replace_rule_neq() {
336        let rule1 = replacer::ReplaceRule {
337            name: "http.url".to_string(),
338            re: Regex::new("(token/)([^/]*)").unwrap(),
339            repl: "${1}?".to_string(),
340            no_expansion: false,
341        };
342        let rule2 = replacer::ReplaceRule {
343            name: "http.url".to_string(),
344            re: Regex::new("(broken/)([^/]*)").unwrap(),
345            repl: "${1}?".to_string(),
346            no_expansion: false,
347        };
348        assert_ne!(rule1, rule2);
349    }
350}