Skip to main content

libdd_trace_obfuscation/
json.rs

1// Copyright 2026-Present Datadog, Inc. https://www.datadoghq.com/
2// SPDX-License-Identifier: Apache-2.0
3
4use std::collections::HashSet;
5
6use crate::json_scanner::{Op, Scanner};
7
8type Transformer = Box<dyn Fn(&str) -> String + Send + Sync>;
9
10/// Obfuscates a JSON string by replacing all leaf values with `"?"`, unless the value
11/// belongs to a key listed in `keep_keys`, in which case it is left verbatim.
12/// Keys in `transform_keys` have their string values passed through a transformer function
13/// (e.g. SQL obfuscation) instead of being replaced with `"?"`.
14///
15/// Multiple concatenated JSON objects in the input are each obfuscated independently.
16/// On a parse error the output so far is returned with `"..."` appended.
17pub struct JsonObfuscator {
18    keep_keys: HashSet<String>,
19    transform_keys: HashSet<String>,
20    transformer: Option<Transformer>,
21}
22
23enum ClosureKind {
24    Array,
25    Object,
26}
27
28impl JsonObfuscator {
29    pub fn new(
30        keep_keys: impl IntoIterator<Item = String>,
31        transform_keys: impl IntoIterator<Item = String>,
32        transformer: Option<Transformer>,
33    ) -> Self {
34        Self {
35            keep_keys: keep_keys.into_iter().collect(),
36            transform_keys: transform_keys.into_iter().collect(),
37            transformer,
38        }
39    }
40
41    /// Obfuscates json string and return an optional error on malformatted json
42    /// If an error occurs, an value is returned anyways which might be truncated (...)
43    pub fn obfuscate(&self, input: &str) -> (String, Option<String>) {
44        if input.is_empty() {
45            return (String::new(), None);
46        }
47
48        let mut out = String::with_capacity(input.len());
49        let mut scanner = Scanner::new();
50        let mut buf = String::new(); // accumulates key chars or transform-value chars
51        let mut closures: Vec<ClosureKind> = Vec::new();
52        let mut keep_depth: usize = 0;
53        let mut key = false;
54        let mut wiped = false;
55        let mut keeping = false;
56        let mut transforming_value = false;
57
58        for c in input.chars() {
59            let op = scanner.step(c);
60            let depth = closures.len(); // snapshot before any mutation
61
62            match op {
63                Op::BeginObject => {
64                    closures.push(ClosureKind::Object);
65                    set_key(&closures, &mut key, &mut wiped);
66                    transforming_value = false;
67                }
68                Op::BeginArray => {
69                    closures.push(ClosureKind::Array);
70                    set_key(&closures, &mut key, &mut wiped);
71                    transforming_value = false;
72                }
73                Op::EndArray | Op::EndObject => {
74                    closures.pop();
75                    set_key(&closures, &mut key, &mut wiped);
76                    handle_value_done(
77                        &mut out,
78                        &mut buf,
79                        &mut keeping,
80                        &mut transforming_value,
81                        &mut keep_depth,
82                        depth,
83                        self.transformer.as_deref(),
84                    );
85                }
86                Op::ObjectValue | Op::ArrayValue => {
87                    set_key(&closures, &mut key, &mut wiped);
88                    handle_value_done(
89                        &mut out,
90                        &mut buf,
91                        &mut keeping,
92                        &mut transforming_value,
93                        &mut keep_depth,
94                        depth,
95                        self.transformer.as_deref(),
96                    );
97                }
98                Op::BeginLiteral | Op::Continue => {
99                    if transforming_value {
100                        buf.push(c);
101                        continue;
102                    } else if key {
103                        buf.push(c);
104                    } else if !keeping {
105                        if !wiped {
106                            out.push_str("\"?\"");
107                            wiped = true;
108                        }
109                        continue;
110                    }
111                }
112                Op::ObjectKey => {
113                    let k = buf.trim_matches('"');
114                    if !keeping && self.keep_keys.contains(k) {
115                        keeping = true;
116                        keep_depth = depth + 1;
117                    } else if !transforming_value
118                        && self.transformer.is_some()
119                        && self.transform_keys.contains(k)
120                    {
121                        transforming_value = true;
122                    }
123                    buf.clear();
124                    key = false;
125                }
126                Op::SkipSpace => continue,
127                Op::Error => {
128                    out.push_str("...");
129                    return (out, scanner.err);
130                }
131                Op::End => {} // whitespace between JSON objects โ€” fall through to output char
132            }
133
134            out.push(c);
135        }
136
137        if scanner.eof() == Op::Error {
138            out.push_str("...");
139        }
140        (out, scanner.err)
141    }
142}
143
144/// Updates `key` and `wiped` based on the current closure stack.
145/// `key` is true at top level or when inside an object (not an array).
146fn set_key(closures: &[ClosureKind], key: &mut bool, wiped: &mut bool) {
147    let n = closures.len();
148    *key = n == 0 || matches!(closures[n - 1], ClosureKind::Object);
149    *wiped = false;
150}
151
152/// Handles the "value is done" logic after a value-ending opcode.
153/// Writes the transformer result if applicable, or stops keeping if depth shrinks.
154fn handle_value_done(
155    out: &mut String,
156    buf: &mut String,
157    keeping: &mut bool,
158    transforming_value: &mut bool,
159    keep_depth: &mut usize,
160    depth: usize,
161    transformer: Option<&(dyn Fn(&str) -> String + Send + Sync)>,
162) {
163    if *transforming_value {
164        if let Some(t) = transformer {
165            // Unquote the collected JSON string literal (handles escape sequences).
166            let raw: String =
167                serde_json::from_str(buf).unwrap_or_else(|_| buf.trim_matches('"').to_string());
168            let result = t(&raw);
169            out.push('"');
170            out.push_str(&result);
171            out.push('"');
172            *transforming_value = false;
173            buf.clear();
174        }
175    } else if *keeping && depth < *keep_depth {
176        *keeping = false;
177    }
178}
179
180#[cfg(test)]
181mod tests {
182    use duplicate::duplicate_item;
183    use serde_json::json;
184
185    use super::JsonObfuscator;
186    use crate::sql::obfuscate_sql_string;
187
188    fn obf(keep_keys: &[&str]) -> JsonObfuscator {
189        JsonObfuscator::new(keep_keys.iter().map(|s| s.to_string()), [], None)
190    }
191
192    fn obf_sql(keep_keys: &[&str], transform_keys: &[&str]) -> JsonObfuscator {
193        JsonObfuscator::new(
194            keep_keys.iter().map(|s| s.to_string()),
195            transform_keys.iter().map(|s| s.to_string()),
196            Some(Box::new(obfuscate_sql_string)),
197        )
198    }
199
200    fn assert_json_eq(result: &str, expected: &str) {
201        let result: serde_json::Value =
202            serde_json::from_str(result).expect("result is not valid JSON");
203        let expected: serde_json::Value =
204            serde_json::from_str(expected).expect("expected is not valid JSON");
205        assert_eq!(result, expected);
206    }
207
208    // Basic obfuscation tests โ€” parametric over (keep_keys, input, expected).
209    // Uses assert_json_eq (structural comparison, whitespace-insensitive).
210    #[duplicate_item(
211        test_name                         keep_keys           input                                                                                                                          expected;
212        [test_empty_object]               [&[]]               ["{}"]                                                                                                                         ["{}"];
213        [test_empty_array]                [&[]]               ["[]"]                                                                                                                         ["[]"];
214        [test_emoji_object]                [&["๐Ÿต"]]               [r#"{"๐Ÿต":"๐Ÿ™Š"}"#]                                                                                                                         [r#"{"๐Ÿต":"๐Ÿ™Š"}"#];
215        [test_nested_empty_objects]       [&[]]               [r#"{"a":{},"b":{"c":{}}}"#]                                                                                                  [r#"{"a":{},"b":{"c":{}}}"#];
216        [test_boolean_and_null_obfuscated][&[]]               [r#"{"a":true,"b":false,"c":null}"#]                                                                                          [r#"{"a":"?","b":"?","c":"?"}"#];
217        [test_all_values_obfuscated]      [&[]]               [r#"{"query":{"multi_match":{"query":"guide","fields":["_all",{"key":"value","other":["1","2",{"k":"v"}]},"2"]}}}"#]           [r#"{"query":{"multi_match":{"query":"?","fields":["?",{"key":"?","other":["?","?",{"k":"?"}]},"?"]}}}"#];
218        [test_numbers_obfuscated]         [&[]]               [r#"{"highlight":{"pre_tags":["<em>"],"post_tags":["</em>"],"index":1}}"#]                                                    [r#"{"highlight":{"pre_tags":["?"],"post_tags":["?"],"index":"?"}}"#];
219        [test_keep_key_keeps_entire_value][&["other"]]        [r#"{"query":{"multi_match":{"query":"guide","fields":["_all",{"key":"value","other":["1","2",{"k":"v"}]},"2"]}}}"#]           [r#"{"query":{"multi_match":{"query":"?","fields":["?",{"key":"?","other":["1","2",{"k":"v"}]},"?"]}}}"#];
220        [test_keep_key_nested_array_fully_kept][&["fields"]]  [r#"{"fields":["_all",{"key":"value","other":["1","2",{"k":"v"}]},"2"]}"#]                                                    [r#"{"fields":["_all",{"key":"value","other":["1","2",{"k":"v"}]},"2"]}"#];
221        [test_keep_key_deep_nested]       [&["k"]]            [r#"{"fields":["_all",{"key":"value","other":["1","2",{"k":"v"}]},"2"]}"#]                                                    [r#"{"fields":["?",{"key":"?","other":["?","?",{"k":"v"}]},"?"]}"#];
222        [test_keep_key_in_nested_object]  [&["C"]]            [r#"{"fields":[{"A":1,"B":{"C":3}},"2"]}"#]                                                                                   [r#"{"fields":[{"A":"?","B":{"C":3}},"?"]}"#];
223        [test_keep_key_large_nested_structure][&["hits"]]     [r#"{"outer":{"total":2,"max_score":0.9105287,"hits":[{"_index":"bookdb_index","_score":0.9105287}]}}"#]                      [r#"{"outer":{"total":"?","max_score":"?","hits":[{"_index":"bookdb_index","_score":0.9105287}]}}"#];
224        [test_keep_multiple_keys]         [&["_index","title"]][r#"{"hits":[{"_index":"bookdb_index","_type":"book","_score":0.9,"_source":{"summary":"text","title":"ES in Action","publish_date":"2015-12-03"},"highlight":{"title":["ES Action"]}}]}"#] [r#"{"hits":[{"_index":"bookdb_index","_type":"?","_score":"?","_source":{"summary":"?","title":"ES in Action","publish_date":"?"},"highlight":{"title":["ES Action"]}}]}"#];
225        [test_keep_key_wallet]            [&["company_wallet_configuration_id"]] [r#"{"email":"dev@datadoghq.com","company_wallet_configuration_id":1}"#] [r#"{"email":"?","company_wallet_configuration_id":1}"#];
226    )]
227    #[test]
228    fn test_name() {
229        let (res, err) = obf(keep_keys).obfuscate(input);
230        assert_eq!(err, None);
231        assert_json_eq(&res, expected);
232    }
233
234    // Truncation / error tests โ€” parametric over (input, expected_exact_string).
235    #[duplicate_item(
236        test_name                           input                                                                    expected                                          expected_error;
237        [test_empty_input]                  [""]                                                                     [""]                                              [None];
238        [test_invalid_emoji]                ["๐Ÿคจ"]                                                                   ["..."]                                           [Some("invalid character '๐Ÿคจ' looking for beginning of value".to_owned())];
239        [test_invalid_unicode]              ["แƒธ"]                                                                    ["..."]                                           [Some("invalid character 'แƒธ' looking for beginning of value".to_owned())];
240        [test_invalid_json_appends_ellipsis]["INVALID"]                                                              ["..."]                                           [Some("invalid character 'I' looking for beginning of value".to_owned())];
241        [test_invalid_single_char]          [")"]                                                                    ["..."]                                           [Some("invalid character ')' looking for beginning of value".to_owned())];
242        [test_truncated_open_value_string]  [r#"{"query":""#]                                                        [r#"{"query":"?"..."#]                            [Some("unexpected end of JSON input at char position 11".to_owned())];
243        [test_truncated_multi_json]         [r#"{"first json": "valid"} {"second json": "unfinished"#]               [r#"{"first json":"?"} {"second json":"?"..."#]   [Some("unexpected end of JSON input at char position 53".to_owned())];
244    )]
245    #[test]
246    fn test_name() {
247        let (res, err) = obf(&[]).obfuscate(input);
248        assert_eq!(res, expected);
249        assert_eq!(err, expected_error);
250    }
251
252    #[test]
253    fn test_multiple_json_objects() {
254        // Multiple concatenated JSON objects (elasticsearch bulk API pattern).
255        let input = r#"{"index":{"_index":"traces","_type":"trace"}} {"value":1,"name":"test"}"#;
256        let (result, err) = obf(&[]).obfuscate(input);
257        assert_eq!(err, None);
258        let mut stream =
259            serde_json::Deserializer::from_str(&result).into_iter::<serde_json::Value>();
260        let first = stream
261            .next()
262            .expect("first value")
263            .expect("first value is valid JSON");
264        let second = stream
265            .next()
266            .expect("second value")
267            .expect("second value is valid JSON");
268        assert_eq!(first, json!({"index":{"_index":"?","_type":"?"}}));
269        assert_eq!(second, json!({"value":"?","name":"?"}));
270    }
271
272    #[test]
273    fn test_transform_key_sql_basic() {
274        let input = r#"{"query":"select * from table where id = 2","hello":"world","hi":"there"}"#;
275        let (result, err) = obf_sql(&["hello"], &["query"]).obfuscate(input);
276        assert_eq!(err, None);
277
278        let val: serde_json::Value = serde_json::from_str(&result).unwrap();
279        assert_eq!(val["hello"], json!("world"));
280        assert_eq!(val["hi"], json!("?"));
281        assert!(
282            val["query"].as_str().unwrap().contains('?'),
283            "SQL value should be obfuscated"
284        );
285    }
286
287    #[test]
288    fn test_transform_key_with_object_value_falls_through() {
289        let input = r#"{"object":{"not a":"query"}}"#;
290        let expected = r#"{"object":{"not a":"?"}}"#;
291        let (res, err) = obf_sql(&[], &["object"]).obfuscate(input);
292        assert_eq!(err, None);
293
294        assert_json_eq(&res, expected);
295    }
296
297    #[test]
298    fn test_transform_key_with_array_value_falls_through() {
299        let input = r#"{"object":["not","a","query"]}"#;
300        let expected = r#"{"object":["?","?","?"]}"#;
301        let (res, err) = obf_sql(&[], &["object"]).obfuscate(input);
302        assert_eq!(err, None);
303
304        assert_json_eq(&res, expected);
305    }
306}