Skip to main content

libdd_trace_obfuscation/json/
mod.rs

1// Copyright 2026-Present Datadog, Inc. https://www.datadoghq.com/
2// SPDX-License-Identifier: Apache-2.0
3
4use crate::obfuscation_config::{JsonObfuscatorConfig, JsonStringTransformer};
5mod scanner;
6use scanner::{Op, Scanner};
7
8/// Obfuscates a JSON string by replacing all leaf values with `"?"`, unless the value belongs to a
9/// key listed in `keep_keys`, in which case it is left verbatim.
10///
11/// Keys in `transform_keys` have their string values passed through a transformer function
12/// (e.g. SQL obfuscation) instead of being replaced with `"?"`.
13///
14/// Multiple concatenated JSON objects in the input are each obfuscated independently.
15/// On a parse error the output so far is returned with `"..."` appended.
16pub struct JsonObfuscator {
17    config: JsonObfuscatorConfig,
18}
19
20enum ClosureKind {
21    Array,
22    Object,
23}
24
25impl JsonObfuscator {
26    #[must_use]
27    pub const fn new(config: JsonObfuscatorConfig) -> Self {
28        Self { config }
29    }
30
31    /// Obfuscates json string and return an optional error on malformatted json
32    /// If an error occurs, an value is returned anyways which might be truncated (...)
33    #[must_use]
34    pub fn obfuscate(&self, input: &str) -> (String, Option<String>) {
35        if input.is_empty() {
36            return (String::new(), None);
37        }
38
39        let mut out = String::with_capacity(input.len());
40        let mut scanner = Scanner::new();
41        let mut buf = String::new(); // accumulates key chars or transform-value chars
42        let mut closures: Vec<ClosureKind> = Vec::new();
43        let mut keep_depth: usize = 0;
44        let mut key = false;
45        let mut wiped = false;
46        let mut keeping = false;
47        let mut transforming_value = false;
48
49        for c in input.chars() {
50            let op = scanner.step(c);
51            let depth = closures.len(); // snapshot before any mutation
52
53            match op {
54                Op::BeginObject => {
55                    closures.push(ClosureKind::Object);
56                    set_key(&closures, &mut key, &mut wiped);
57                    transforming_value = false;
58                }
59                Op::BeginArray => {
60                    closures.push(ClosureKind::Array);
61                    set_key(&closures, &mut key, &mut wiped);
62                    transforming_value = false;
63                }
64                Op::EndArray | Op::EndObject => {
65                    closures.pop();
66                    set_key(&closures, &mut key, &mut wiped);
67                    handle_value_done(
68                        &mut out,
69                        &mut buf,
70                        &mut keeping,
71                        &mut transforming_value,
72                        keep_depth,
73                        depth,
74                        self.config.transformer.as_ref(),
75                    );
76                }
77                Op::ObjectValue | Op::ArrayValue => {
78                    set_key(&closures, &mut key, &mut wiped);
79                    handle_value_done(
80                        &mut out,
81                        &mut buf,
82                        &mut keeping,
83                        &mut transforming_value,
84                        keep_depth,
85                        depth,
86                        self.config.transformer.as_ref(),
87                    );
88                }
89                Op::BeginLiteral | Op::Continue => {
90                    if transforming_value {
91                        buf.push(c);
92                        continue;
93                    } else if key {
94                        buf.push(c);
95                    } else if !keeping {
96                        if !wiped {
97                            out.push_str("\"?\"");
98                            wiped = true;
99                        }
100                        continue;
101                    }
102                }
103                Op::ObjectKey => {
104                    let k = buf.trim_matches('"');
105                    if !keeping && self.config.keep_keys.contains(k) {
106                        keeping = true;
107                        keep_depth = depth + 1;
108                    } else if !transforming_value
109                        && self.config.transformer.is_some()
110                        && self.config.transform_keys.contains(k)
111                    {
112                        transforming_value = true;
113                    }
114                    buf.clear();
115                    key = false;
116                }
117                Op::SkipSpace => continue,
118                Op::Error => {
119                    out.push_str("...");
120                    return (out, scanner.err);
121                }
122                Op::End => {} // whitespace between JSON objects โ€” fall through to output char
123            }
124
125            out.push(c);
126        }
127
128        if scanner.eof() == Op::Error {
129            out.push_str("...");
130        }
131        (out, scanner.err)
132    }
133}
134
135/// Updates `key` and `wiped` based on the current closure stack.
136/// `key` is true at top level or when inside an object (not an array).
137fn set_key(closures: &[ClosureKind], key: &mut bool, wiped: &mut bool) {
138    let n = closures.len();
139    *key = n == 0 || matches!(closures[n - 1], ClosureKind::Object);
140    *wiped = false;
141}
142
143/// Handles the "value is done" logic after a value-ending opcode.
144/// Writes the transformer result if applicable, or stops keeping if depth shrinks.
145fn handle_value_done(
146    out: &mut String,
147    buf: &mut String,
148    keeping: &mut bool,
149    transforming_value: &mut bool,
150    keep_depth: usize,
151    depth: usize,
152    transformer: Option<&JsonStringTransformer>,
153) {
154    if *transforming_value {
155        if let Some(t) = transformer {
156            // Unquote the collected JSON string literal (handles escape sequences).
157            let raw: String =
158                serde_json::from_str(buf).unwrap_or_else(|_| buf.trim_matches('"').to_string());
159            let result = t(&raw);
160            out.push('"');
161            out.push_str(&result);
162            out.push('"');
163            *transforming_value = false;
164            buf.clear();
165        }
166    } else if *keeping && depth < keep_depth {
167        *keeping = false;
168    }
169}
170
171#[cfg(test)]
172mod tests {
173    use duplicate::duplicate_item;
174    use serde_json::json;
175
176    use super::JsonObfuscator;
177    use crate::{obfuscation_config::JsonObfuscatorConfig, sql::obfuscate_sql_string};
178
179    fn obf(keep_keys: &[&str]) -> JsonObfuscator {
180        JsonObfuscator::new(JsonObfuscatorConfig {
181            enabled: true,
182            keep_keys: keep_keys
183                .iter()
184                .map(std::string::ToString::to_string)
185                .collect(),
186            ..Default::default()
187        })
188    }
189
190    fn obf_sql(keep_keys: &[&str], transform_keys: &[&str]) -> JsonObfuscator {
191        JsonObfuscator::new(JsonObfuscatorConfig {
192            enabled: true,
193            keep_keys: keep_keys
194                .iter()
195                .map(std::string::ToString::to_string)
196                .collect(),
197            transform_keys: transform_keys
198                .iter()
199                .map(std::string::ToString::to_string)
200                .collect(),
201            transformer: Some(obfuscate_sql_string),
202        })
203    }
204
205    fn assert_json_eq(result: &str, expected: &str) {
206        let result: serde_json::Value =
207            serde_json::from_str(result).expect("result is not valid JSON");
208        let expected: serde_json::Value =
209            serde_json::from_str(expected).expect("expected is not valid JSON");
210        assert_eq!(result, expected);
211    }
212
213    // Basic obfuscation tests โ€” parametric over (keep_keys, input, expected).
214    // Uses assert_json_eq (structural comparison, whitespace-insensitive).
215    #[duplicate_item(
216        test_name                         keep_keys           input                                                                                                                          expected;
217        [test_empty_object]               [&[]]               ["{}"]                                                                                                                         ["{}"];
218        [test_empty_array]                [&[]]               ["[]"]                                                                                                                         ["[]"];
219        [test_emoji_object]                [&["๐Ÿต"]]               [r#"{"๐Ÿต":"๐Ÿ™Š"}"#]                                                                                                                         [r#"{"๐Ÿต":"๐Ÿ™Š"}"#];
220        [test_nested_empty_objects]       [&[]]               [r#"{"a":{},"b":{"c":{}}}"#]                                                                                                  [r#"{"a":{},"b":{"c":{}}}"#];
221        [test_boolean_and_null_obfuscated][&[]]               [r#"{"a":true,"b":false,"c":null}"#]                                                                                          [r#"{"a":"?","b":"?","c":"?"}"#];
222        [test_all_values_obfuscated]      [&[]]               [r#"{"query":{"multi_match":{"query":"guide","fields":["_all",{"key":"value","other":["1","2",{"k":"v"}]},"2"]}}}"#]           [r#"{"query":{"multi_match":{"query":"?","fields":["?",{"key":"?","other":["?","?",{"k":"?"}]},"?"]}}}"#];
223        [test_numbers_obfuscated]         [&[]]               [r#"{"highlight":{"pre_tags":["<em>"],"post_tags":["</em>"],"index":1}}"#]                                                    [r#"{"highlight":{"pre_tags":["?"],"post_tags":["?"],"index":"?"}}"#];
224        [test_keep_key_keeps_entire_value][&["other"]]        [r#"{"query":{"multi_match":{"query":"guide","fields":["_all",{"key":"value","other":["1","2",{"k":"v"}]},"2"]}}}"#]           [r#"{"query":{"multi_match":{"query":"?","fields":["?",{"key":"?","other":["1","2",{"k":"v"}]},"?"]}}}"#];
225        [test_keep_key_nested_array_fully_kept][&["fields"]]  [r#"{"fields":["_all",{"key":"value","other":["1","2",{"k":"v"}]},"2"]}"#]                                                    [r#"{"fields":["_all",{"key":"value","other":["1","2",{"k":"v"}]},"2"]}"#];
226        [test_keep_key_deep_nested]       [&["k"]]            [r#"{"fields":["_all",{"key":"value","other":["1","2",{"k":"v"}]},"2"]}"#]                                                    [r#"{"fields":["?",{"key":"?","other":["?","?",{"k":"v"}]},"?"]}"#];
227        [test_keep_key_in_nested_object]  [&["C"]]            [r#"{"fields":[{"A":1,"B":{"C":3}},"2"]}"#]                                                                                   [r#"{"fields":[{"A":"?","B":{"C":3}},"?"]}"#];
228        [test_keep_key_large_nested_structure][&["hits"]]     [r#"{"outer":{"total":2,"max_score":0.9105287,"hits":[{"_index":"bookdb_index","_score":0.9105287}]}}"#]                      [r#"{"outer":{"total":"?","max_score":"?","hits":[{"_index":"bookdb_index","_score":0.9105287}]}}"#];
229        [test_keep_multiple_keys]         [&["_index","title"]][r#"{"hits":[{"_index":"bookdb_index","_type":"book","_score":0.9,"_source":{"summary":"text","title":"ES in Action","publish_date":"2015-12-03"},"highlight":{"title":["ES Action"]}}]}"#] [r#"{"hits":[{"_index":"bookdb_index","_type":"?","_score":"?","_source":{"summary":"?","title":"ES in Action","publish_date":"?"},"highlight":{"title":["ES Action"]}}]}"#];
230        [test_keep_key_wallet]            [&["company_wallet_configuration_id"]] [r#"{"email":"dev@datadoghq.com","company_wallet_configuration_id":1}"#] [r#"{"email":"?","company_wallet_configuration_id":1}"#];
231    )]
232    #[test]
233    fn test_name() {
234        let (res, err) = obf(keep_keys).obfuscate(input);
235        assert_eq!(err, None);
236        assert_json_eq(&res, expected);
237    }
238
239    // Truncation / error tests โ€” parametric over (input, expected_exact_string).
240    #[duplicate_item(
241        test_name                           input                                                                    expected                                          expected_error;
242        [test_empty_input]                  [""]                                                                     [""]                                              [None];
243        [test_invalid_emoji]                ["๐Ÿคจ"]                                                                   ["..."]                                           [Some("invalid character '๐Ÿคจ' looking for beginning of value".to_owned())];
244        [test_invalid_unicode]              ["แƒธ"]                                                                    ["..."]                                           [Some("invalid character 'แƒธ' looking for beginning of value".to_owned())];
245        [test_invalid_json_appends_ellipsis]["INVALID"]                                                              ["..."]                                           [Some("invalid character 'I' looking for beginning of value".to_owned())];
246        [test_invalid_single_char]          [")"]                                                                    ["..."]                                           [Some("invalid character ')' looking for beginning of value".to_owned())];
247        [test_truncated_open_value_string]  [r#"{"query":""#]                                                        [r#"{"query":"?"..."#]                            [Some("unexpected end of JSON input at char position 11".to_owned())];
248        [test_truncated_multi_json]         [r#"{"first json": "valid"} {"second json": "unfinished"#]               [r#"{"first json":"?"} {"second json":"?"..."#]   [Some("unexpected end of JSON input at char position 53".to_owned())];
249    )]
250    #[test]
251    fn test_name() {
252        let (res, err) = obf(&[]).obfuscate(input);
253        assert_eq!(res, expected);
254        assert_eq!(err, expected_error);
255    }
256
257    #[test]
258    fn test_multiple_json_objects() {
259        // Multiple concatenated JSON objects (elasticsearch bulk API pattern).
260        let input = r#"{"index":{"_index":"traces","_type":"trace"}} {"value":1,"name":"test"}"#;
261        let (result, err) = obf(&[]).obfuscate(input);
262        assert_eq!(err, None);
263        let mut stream =
264            serde_json::Deserializer::from_str(&result).into_iter::<serde_json::Value>();
265        let first = stream
266            .next()
267            .expect("first value")
268            .expect("first value is valid JSON");
269        let second = stream
270            .next()
271            .expect("second value")
272            .expect("second value is valid JSON");
273        assert_eq!(first, json!({"index":{"_index":"?","_type":"?"}}));
274        assert_eq!(second, json!({"value":"?","name":"?"}));
275    }
276
277    #[test]
278    fn test_transform_key_sql_basic() {
279        let input = r#"{"query":"select * from table where id = 2","hello":"world","hi":"there"}"#;
280        let (result, err) = obf_sql(&["hello"], &["query"]).obfuscate(input);
281        assert_eq!(err, None);
282
283        let val: serde_json::Value = serde_json::from_str(&result).unwrap();
284        assert_eq!(val["hello"], json!("world"));
285        assert_eq!(val["hi"], json!("?"));
286        assert!(
287            val["query"].as_str().unwrap().contains('?'),
288            "SQL value should be obfuscated"
289        );
290    }
291
292    #[test]
293    fn test_transform_key_with_object_value_falls_through() {
294        let input = r#"{"object":{"not a":"query"}}"#;
295        let expected = r#"{"object":{"not a":"?"}}"#;
296        let (res, err) = obf_sql(&[], &["object"]).obfuscate(input);
297        assert_eq!(err, None);
298
299        assert_json_eq(&res, expected);
300    }
301
302    #[test]
303    fn test_transform_key_with_array_value_falls_through() {
304        let input = r#"{"object":["not","a","query"]}"#;
305        let expected = r#"{"object":["?","?","?"]}"#;
306        let (res, err) = obf_sql(&[], &["object"]).obfuscate(input);
307        assert_eq!(err, None);
308
309        assert_json_eq(&res, expected);
310    }
311}