1use crate::obfuscation_config::{JsonObfuscatorConfig, JsonStringTransformer};
5mod scanner;
6use scanner::{Op, Scanner};
7
8pub struct JsonObfuscator {
17 config: JsonObfuscatorConfig,
18}
19
20enum ClosureKind {
21 Array,
22 Object,
23}
24
25impl JsonObfuscator {
26 #[must_use]
27 pub const fn new(config: JsonObfuscatorConfig) -> Self {
28 Self { config }
29 }
30
31 #[must_use]
34 pub fn obfuscate(&self, input: &str) -> (String, Option<String>) {
35 if input.is_empty() {
36 return (String::new(), None);
37 }
38
39 let mut out = String::with_capacity(input.len());
40 let mut scanner = Scanner::new();
41 let mut buf = String::new(); let mut closures: Vec<ClosureKind> = Vec::new();
43 let mut keep_depth: usize = 0;
44 let mut key = false;
45 let mut wiped = false;
46 let mut keeping = false;
47 let mut transforming_value = false;
48
49 for c in input.chars() {
50 let op = scanner.step(c);
51 let depth = closures.len(); match op {
54 Op::BeginObject => {
55 closures.push(ClosureKind::Object);
56 set_key(&closures, &mut key, &mut wiped);
57 transforming_value = false;
58 }
59 Op::BeginArray => {
60 closures.push(ClosureKind::Array);
61 set_key(&closures, &mut key, &mut wiped);
62 transforming_value = false;
63 }
64 Op::EndArray | Op::EndObject => {
65 closures.pop();
66 set_key(&closures, &mut key, &mut wiped);
67 handle_value_done(
68 &mut out,
69 &mut buf,
70 &mut keeping,
71 &mut transforming_value,
72 keep_depth,
73 depth,
74 self.config.transformer.as_ref(),
75 );
76 }
77 Op::ObjectValue | Op::ArrayValue => {
78 set_key(&closures, &mut key, &mut wiped);
79 handle_value_done(
80 &mut out,
81 &mut buf,
82 &mut keeping,
83 &mut transforming_value,
84 keep_depth,
85 depth,
86 self.config.transformer.as_ref(),
87 );
88 }
89 Op::BeginLiteral | Op::Continue => {
90 if transforming_value {
91 buf.push(c);
92 continue;
93 } else if key {
94 buf.push(c);
95 } else if !keeping {
96 if !wiped {
97 out.push_str("\"?\"");
98 wiped = true;
99 }
100 continue;
101 }
102 }
103 Op::ObjectKey => {
104 let k = buf.trim_matches('"');
105 if !keeping && self.config.keep_keys.contains(k) {
106 keeping = true;
107 keep_depth = depth + 1;
108 } else if !transforming_value
109 && self.config.transformer.is_some()
110 && self.config.transform_keys.contains(k)
111 {
112 transforming_value = true;
113 }
114 buf.clear();
115 key = false;
116 }
117 Op::SkipSpace => continue,
118 Op::Error => {
119 out.push_str("...");
120 return (out, scanner.err);
121 }
122 Op::End => {} }
124
125 out.push(c);
126 }
127
128 if scanner.eof() == Op::Error {
129 out.push_str("...");
130 }
131 (out, scanner.err)
132 }
133}
134
135fn set_key(closures: &[ClosureKind], key: &mut bool, wiped: &mut bool) {
138 let n = closures.len();
139 *key = n == 0 || matches!(closures[n - 1], ClosureKind::Object);
140 *wiped = false;
141}
142
143fn handle_value_done(
146 out: &mut String,
147 buf: &mut String,
148 keeping: &mut bool,
149 transforming_value: &mut bool,
150 keep_depth: usize,
151 depth: usize,
152 transformer: Option<&JsonStringTransformer>,
153) {
154 if *transforming_value {
155 if let Some(t) = transformer {
156 let raw: String =
158 serde_json::from_str(buf).unwrap_or_else(|_| buf.trim_matches('"').to_string());
159 let result = t(&raw);
160 out.push('"');
161 out.push_str(&result);
162 out.push('"');
163 *transforming_value = false;
164 buf.clear();
165 }
166 } else if *keeping && depth < keep_depth {
167 *keeping = false;
168 }
169}
170
171#[cfg(test)]
172mod tests {
173 use duplicate::duplicate_item;
174 use serde_json::json;
175
176 use super::JsonObfuscator;
177 use crate::{obfuscation_config::JsonObfuscatorConfig, sql::obfuscate_sql_string};
178
179 fn obf(keep_keys: &[&str]) -> JsonObfuscator {
180 JsonObfuscator::new(JsonObfuscatorConfig {
181 enabled: true,
182 keep_keys: keep_keys
183 .iter()
184 .map(std::string::ToString::to_string)
185 .collect(),
186 ..Default::default()
187 })
188 }
189
190 fn obf_sql(keep_keys: &[&str], transform_keys: &[&str]) -> JsonObfuscator {
191 JsonObfuscator::new(JsonObfuscatorConfig {
192 enabled: true,
193 keep_keys: keep_keys
194 .iter()
195 .map(std::string::ToString::to_string)
196 .collect(),
197 transform_keys: transform_keys
198 .iter()
199 .map(std::string::ToString::to_string)
200 .collect(),
201 transformer: Some(obfuscate_sql_string),
202 })
203 }
204
205 fn assert_json_eq(result: &str, expected: &str) {
206 let result: serde_json::Value =
207 serde_json::from_str(result).expect("result is not valid JSON");
208 let expected: serde_json::Value =
209 serde_json::from_str(expected).expect("expected is not valid JSON");
210 assert_eq!(result, expected);
211 }
212
213 #[duplicate_item(
216 test_name keep_keys input expected;
217 [test_empty_object] [&[]] ["{}"] ["{}"];
218 [test_empty_array] [&[]] ["[]"] ["[]"];
219 [test_emoji_object] [&["๐ต"]] [r#"{"๐ต":"๐"}"#] [r#"{"๐ต":"๐"}"#];
220 [test_nested_empty_objects] [&[]] [r#"{"a":{},"b":{"c":{}}}"#] [r#"{"a":{},"b":{"c":{}}}"#];
221 [test_boolean_and_null_obfuscated][&[]] [r#"{"a":true,"b":false,"c":null}"#] [r#"{"a":"?","b":"?","c":"?"}"#];
222 [test_all_values_obfuscated] [&[]] [r#"{"query":{"multi_match":{"query":"guide","fields":["_all",{"key":"value","other":["1","2",{"k":"v"}]},"2"]}}}"#] [r#"{"query":{"multi_match":{"query":"?","fields":["?",{"key":"?","other":["?","?",{"k":"?"}]},"?"]}}}"#];
223 [test_numbers_obfuscated] [&[]] [r#"{"highlight":{"pre_tags":["<em>"],"post_tags":["</em>"],"index":1}}"#] [r#"{"highlight":{"pre_tags":["?"],"post_tags":["?"],"index":"?"}}"#];
224 [test_keep_key_keeps_entire_value][&["other"]] [r#"{"query":{"multi_match":{"query":"guide","fields":["_all",{"key":"value","other":["1","2",{"k":"v"}]},"2"]}}}"#] [r#"{"query":{"multi_match":{"query":"?","fields":["?",{"key":"?","other":["1","2",{"k":"v"}]},"?"]}}}"#];
225 [test_keep_key_nested_array_fully_kept][&["fields"]] [r#"{"fields":["_all",{"key":"value","other":["1","2",{"k":"v"}]},"2"]}"#] [r#"{"fields":["_all",{"key":"value","other":["1","2",{"k":"v"}]},"2"]}"#];
226 [test_keep_key_deep_nested] [&["k"]] [r#"{"fields":["_all",{"key":"value","other":["1","2",{"k":"v"}]},"2"]}"#] [r#"{"fields":["?",{"key":"?","other":["?","?",{"k":"v"}]},"?"]}"#];
227 [test_keep_key_in_nested_object] [&["C"]] [r#"{"fields":[{"A":1,"B":{"C":3}},"2"]}"#] [r#"{"fields":[{"A":"?","B":{"C":3}},"?"]}"#];
228 [test_keep_key_large_nested_structure][&["hits"]] [r#"{"outer":{"total":2,"max_score":0.9105287,"hits":[{"_index":"bookdb_index","_score":0.9105287}]}}"#] [r#"{"outer":{"total":"?","max_score":"?","hits":[{"_index":"bookdb_index","_score":0.9105287}]}}"#];
229 [test_keep_multiple_keys] [&["_index","title"]][r#"{"hits":[{"_index":"bookdb_index","_type":"book","_score":0.9,"_source":{"summary":"text","title":"ES in Action","publish_date":"2015-12-03"},"highlight":{"title":["ES Action"]}}]}"#] [r#"{"hits":[{"_index":"bookdb_index","_type":"?","_score":"?","_source":{"summary":"?","title":"ES in Action","publish_date":"?"},"highlight":{"title":["ES Action"]}}]}"#];
230 [test_keep_key_wallet] [&["company_wallet_configuration_id"]] [r#"{"email":"dev@datadoghq.com","company_wallet_configuration_id":1}"#] [r#"{"email":"?","company_wallet_configuration_id":1}"#];
231 )]
232 #[test]
233 fn test_name() {
234 let (res, err) = obf(keep_keys).obfuscate(input);
235 assert_eq!(err, None);
236 assert_json_eq(&res, expected);
237 }
238
239 #[duplicate_item(
241 test_name input expected expected_error;
242 [test_empty_input] [""] [""] [None];
243 [test_invalid_emoji] ["๐คจ"] ["..."] [Some("invalid character '๐คจ' looking for beginning of value".to_owned())];
244 [test_invalid_unicode] ["แธ"] ["..."] [Some("invalid character 'แธ' looking for beginning of value".to_owned())];
245 [test_invalid_json_appends_ellipsis]["INVALID"] ["..."] [Some("invalid character 'I' looking for beginning of value".to_owned())];
246 [test_invalid_single_char] [")"] ["..."] [Some("invalid character ')' looking for beginning of value".to_owned())];
247 [test_truncated_open_value_string] [r#"{"query":""#] [r#"{"query":"?"..."#] [Some("unexpected end of JSON input at char position 11".to_owned())];
248 [test_truncated_multi_json] [r#"{"first json": "valid"} {"second json": "unfinished"#] [r#"{"first json":"?"} {"second json":"?"..."#] [Some("unexpected end of JSON input at char position 53".to_owned())];
249 )]
250 #[test]
251 fn test_name() {
252 let (res, err) = obf(&[]).obfuscate(input);
253 assert_eq!(res, expected);
254 assert_eq!(err, expected_error);
255 }
256
257 #[test]
258 fn test_multiple_json_objects() {
259 let input = r#"{"index":{"_index":"traces","_type":"trace"}} {"value":1,"name":"test"}"#;
261 let (result, err) = obf(&[]).obfuscate(input);
262 assert_eq!(err, None);
263 let mut stream =
264 serde_json::Deserializer::from_str(&result).into_iter::<serde_json::Value>();
265 let first = stream
266 .next()
267 .expect("first value")
268 .expect("first value is valid JSON");
269 let second = stream
270 .next()
271 .expect("second value")
272 .expect("second value is valid JSON");
273 assert_eq!(first, json!({"index":{"_index":"?","_type":"?"}}));
274 assert_eq!(second, json!({"value":"?","name":"?"}));
275 }
276
277 #[test]
278 fn test_transform_key_sql_basic() {
279 let input = r#"{"query":"select * from table where id = 2","hello":"world","hi":"there"}"#;
280 let (result, err) = obf_sql(&["hello"], &["query"]).obfuscate(input);
281 assert_eq!(err, None);
282
283 let val: serde_json::Value = serde_json::from_str(&result).unwrap();
284 assert_eq!(val["hello"], json!("world"));
285 assert_eq!(val["hi"], json!("?"));
286 assert!(
287 val["query"].as_str().unwrap().contains('?'),
288 "SQL value should be obfuscated"
289 );
290 }
291
292 #[test]
293 fn test_transform_key_with_object_value_falls_through() {
294 let input = r#"{"object":{"not a":"query"}}"#;
295 let expected = r#"{"object":{"not a":"?"}}"#;
296 let (res, err) = obf_sql(&[], &["object"]).obfuscate(input);
297 assert_eq!(err, None);
298
299 assert_json_eq(&res, expected);
300 }
301
302 #[test]
303 fn test_transform_key_with_array_value_falls_through() {
304 let input = r#"{"object":["not","a","query"]}"#;
305 let expected = r#"{"object":["?","?","?"]}"#;
306 let (res, err) = obf_sql(&[], &["object"]).obfuscate(input);
307 assert_eq!(err, None);
308
309 assert_json_eq(&res, expected);
310 }
311}