1use crate::json_scanner::{Op, Scanner};
5use crate::obfuscation_config::{JsonObfuscatorConfig, JsonStringTransformer};
6
7pub struct JsonObfuscator {
15 config: JsonObfuscatorConfig,
16}
17
18enum ClosureKind {
19 Array,
20 Object,
21}
22
23impl JsonObfuscator {
24 pub fn new(config: JsonObfuscatorConfig) -> Self {
25 Self { config }
26 }
27
28 pub fn obfuscate(&self, input: &str) -> (String, Option<String>) {
31 if input.is_empty() {
32 return (String::new(), None);
33 }
34
35 let mut out = String::with_capacity(input.len());
36 let mut scanner = Scanner::new();
37 let mut buf = String::new(); let mut closures: Vec<ClosureKind> = Vec::new();
39 let mut keep_depth: usize = 0;
40 let mut key = false;
41 let mut wiped = false;
42 let mut keeping = false;
43 let mut transforming_value = false;
44
45 for c in input.chars() {
46 let op = scanner.step(c);
47 let depth = closures.len(); match op {
50 Op::BeginObject => {
51 closures.push(ClosureKind::Object);
52 set_key(&closures, &mut key, &mut wiped);
53 transforming_value = false;
54 }
55 Op::BeginArray => {
56 closures.push(ClosureKind::Array);
57 set_key(&closures, &mut key, &mut wiped);
58 transforming_value = false;
59 }
60 Op::EndArray | Op::EndObject => {
61 closures.pop();
62 set_key(&closures, &mut key, &mut wiped);
63 handle_value_done(
64 &mut out,
65 &mut buf,
66 &mut keeping,
67 &mut transforming_value,
68 &mut keep_depth,
69 depth,
70 self.config.transformer.as_ref(),
71 );
72 }
73 Op::ObjectValue | Op::ArrayValue => {
74 set_key(&closures, &mut key, &mut wiped);
75 handle_value_done(
76 &mut out,
77 &mut buf,
78 &mut keeping,
79 &mut transforming_value,
80 &mut keep_depth,
81 depth,
82 self.config.transformer.as_ref(),
83 );
84 }
85 Op::BeginLiteral | Op::Continue => {
86 if transforming_value {
87 buf.push(c);
88 continue;
89 } else if key {
90 buf.push(c);
91 } else if !keeping {
92 if !wiped {
93 out.push_str("\"?\"");
94 wiped = true;
95 }
96 continue;
97 }
98 }
99 Op::ObjectKey => {
100 let k = buf.trim_matches('"');
101 if !keeping && self.config.keep_keys.contains(k) {
102 keeping = true;
103 keep_depth = depth + 1;
104 } else if !transforming_value
105 && self.config.transformer.is_some()
106 && self.config.transform_keys.contains(k)
107 {
108 transforming_value = true;
109 }
110 buf.clear();
111 key = false;
112 }
113 Op::SkipSpace => continue,
114 Op::Error => {
115 out.push_str("...");
116 return (out, scanner.err);
117 }
118 Op::End => {} }
120
121 out.push(c);
122 }
123
124 if scanner.eof() == Op::Error {
125 out.push_str("...");
126 }
127 (out, scanner.err)
128 }
129}
130
131fn set_key(closures: &[ClosureKind], key: &mut bool, wiped: &mut bool) {
134 let n = closures.len();
135 *key = n == 0 || matches!(closures[n - 1], ClosureKind::Object);
136 *wiped = false;
137}
138
139fn handle_value_done(
142 out: &mut String,
143 buf: &mut String,
144 keeping: &mut bool,
145 transforming_value: &mut bool,
146 keep_depth: &mut usize,
147 depth: usize,
148 transformer: Option<&JsonStringTransformer>,
149) {
150 if *transforming_value {
151 if let Some(t) = transformer {
152 let raw: String =
154 serde_json::from_str(buf).unwrap_or_else(|_| buf.trim_matches('"').to_string());
155 let result = t(&raw);
156 out.push('"');
157 out.push_str(&result);
158 out.push('"');
159 *transforming_value = false;
160 buf.clear();
161 }
162 } else if *keeping && depth < *keep_depth {
163 *keeping = false;
164 }
165}
166
167#[cfg(test)]
168mod tests {
169 use duplicate::duplicate_item;
170 use serde_json::json;
171
172 use super::JsonObfuscator;
173 use crate::{obfuscation_config::JsonObfuscatorConfig, sql::obfuscate_sql_string};
174
175 fn obf(keep_keys: &[&str]) -> JsonObfuscator {
176 JsonObfuscator::new(JsonObfuscatorConfig {
177 enabled: true,
178 keep_keys: keep_keys.iter().map(|key| key.to_string()).collect(),
179 ..Default::default()
180 })
181 }
182
183 fn obf_sql(keep_keys: &[&str], transform_keys: &[&str]) -> JsonObfuscator {
184 JsonObfuscator::new(JsonObfuscatorConfig {
185 enabled: true,
186 keep_keys: keep_keys.iter().map(|s| s.to_string()).collect(),
187 transform_keys: transform_keys.iter().map(|s| s.to_string()).collect(),
188 transformer: Some(obfuscate_sql_string),
189 })
190 }
191
192 fn assert_json_eq(result: &str, expected: &str) {
193 let result: serde_json::Value =
194 serde_json::from_str(result).expect("result is not valid JSON");
195 let expected: serde_json::Value =
196 serde_json::from_str(expected).expect("expected is not valid JSON");
197 assert_eq!(result, expected);
198 }
199
200 #[duplicate_item(
203 test_name keep_keys input expected;
204 [test_empty_object] [&[]] ["{}"] ["{}"];
205 [test_empty_array] [&[]] ["[]"] ["[]"];
206 [test_emoji_object] [&["๐ต"]] [r#"{"๐ต":"๐"}"#] [r#"{"๐ต":"๐"}"#];
207 [test_nested_empty_objects] [&[]] [r#"{"a":{},"b":{"c":{}}}"#] [r#"{"a":{},"b":{"c":{}}}"#];
208 [test_boolean_and_null_obfuscated][&[]] [r#"{"a":true,"b":false,"c":null}"#] [r#"{"a":"?","b":"?","c":"?"}"#];
209 [test_all_values_obfuscated] [&[]] [r#"{"query":{"multi_match":{"query":"guide","fields":["_all",{"key":"value","other":["1","2",{"k":"v"}]},"2"]}}}"#] [r#"{"query":{"multi_match":{"query":"?","fields":["?",{"key":"?","other":["?","?",{"k":"?"}]},"?"]}}}"#];
210 [test_numbers_obfuscated] [&[]] [r#"{"highlight":{"pre_tags":["<em>"],"post_tags":["</em>"],"index":1}}"#] [r#"{"highlight":{"pre_tags":["?"],"post_tags":["?"],"index":"?"}}"#];
211 [test_keep_key_keeps_entire_value][&["other"]] [r#"{"query":{"multi_match":{"query":"guide","fields":["_all",{"key":"value","other":["1","2",{"k":"v"}]},"2"]}}}"#] [r#"{"query":{"multi_match":{"query":"?","fields":["?",{"key":"?","other":["1","2",{"k":"v"}]},"?"]}}}"#];
212 [test_keep_key_nested_array_fully_kept][&["fields"]] [r#"{"fields":["_all",{"key":"value","other":["1","2",{"k":"v"}]},"2"]}"#] [r#"{"fields":["_all",{"key":"value","other":["1","2",{"k":"v"}]},"2"]}"#];
213 [test_keep_key_deep_nested] [&["k"]] [r#"{"fields":["_all",{"key":"value","other":["1","2",{"k":"v"}]},"2"]}"#] [r#"{"fields":["?",{"key":"?","other":["?","?",{"k":"v"}]},"?"]}"#];
214 [test_keep_key_in_nested_object] [&["C"]] [r#"{"fields":[{"A":1,"B":{"C":3}},"2"]}"#] [r#"{"fields":[{"A":"?","B":{"C":3}},"?"]}"#];
215 [test_keep_key_large_nested_structure][&["hits"]] [r#"{"outer":{"total":2,"max_score":0.9105287,"hits":[{"_index":"bookdb_index","_score":0.9105287}]}}"#] [r#"{"outer":{"total":"?","max_score":"?","hits":[{"_index":"bookdb_index","_score":0.9105287}]}}"#];
216 [test_keep_multiple_keys] [&["_index","title"]][r#"{"hits":[{"_index":"bookdb_index","_type":"book","_score":0.9,"_source":{"summary":"text","title":"ES in Action","publish_date":"2015-12-03"},"highlight":{"title":["ES Action"]}}]}"#] [r#"{"hits":[{"_index":"bookdb_index","_type":"?","_score":"?","_source":{"summary":"?","title":"ES in Action","publish_date":"?"},"highlight":{"title":["ES Action"]}}]}"#];
217 [test_keep_key_wallet] [&["company_wallet_configuration_id"]] [r#"{"email":"dev@datadoghq.com","company_wallet_configuration_id":1}"#] [r#"{"email":"?","company_wallet_configuration_id":1}"#];
218 )]
219 #[test]
220 fn test_name() {
221 let (res, err) = obf(keep_keys).obfuscate(input);
222 assert_eq!(err, None);
223 assert_json_eq(&res, expected);
224 }
225
226 #[duplicate_item(
228 test_name input expected expected_error;
229 [test_empty_input] [""] [""] [None];
230 [test_invalid_emoji] ["๐คจ"] ["..."] [Some("invalid character '๐คจ' looking for beginning of value".to_owned())];
231 [test_invalid_unicode] ["แธ"] ["..."] [Some("invalid character 'แธ' looking for beginning of value".to_owned())];
232 [test_invalid_json_appends_ellipsis]["INVALID"] ["..."] [Some("invalid character 'I' looking for beginning of value".to_owned())];
233 [test_invalid_single_char] [")"] ["..."] [Some("invalid character ')' looking for beginning of value".to_owned())];
234 [test_truncated_open_value_string] [r#"{"query":""#] [r#"{"query":"?"..."#] [Some("unexpected end of JSON input at char position 11".to_owned())];
235 [test_truncated_multi_json] [r#"{"first json": "valid"} {"second json": "unfinished"#] [r#"{"first json":"?"} {"second json":"?"..."#] [Some("unexpected end of JSON input at char position 53".to_owned())];
236 )]
237 #[test]
238 fn test_name() {
239 let (res, err) = obf(&[]).obfuscate(input);
240 assert_eq!(res, expected);
241 assert_eq!(err, expected_error);
242 }
243
244 #[test]
245 fn test_multiple_json_objects() {
246 let input = r#"{"index":{"_index":"traces","_type":"trace"}} {"value":1,"name":"test"}"#;
248 let (result, err) = obf(&[]).obfuscate(input);
249 assert_eq!(err, None);
250 let mut stream =
251 serde_json::Deserializer::from_str(&result).into_iter::<serde_json::Value>();
252 let first = stream
253 .next()
254 .expect("first value")
255 .expect("first value is valid JSON");
256 let second = stream
257 .next()
258 .expect("second value")
259 .expect("second value is valid JSON");
260 assert_eq!(first, json!({"index":{"_index":"?","_type":"?"}}));
261 assert_eq!(second, json!({"value":"?","name":"?"}));
262 }
263
264 #[test]
265 fn test_transform_key_sql_basic() {
266 let input = r#"{"query":"select * from table where id = 2","hello":"world","hi":"there"}"#;
267 let (result, err) = obf_sql(&["hello"], &["query"]).obfuscate(input);
268 assert_eq!(err, None);
269
270 let val: serde_json::Value = serde_json::from_str(&result).unwrap();
271 assert_eq!(val["hello"], json!("world"));
272 assert_eq!(val["hi"], json!("?"));
273 assert!(
274 val["query"].as_str().unwrap().contains('?'),
275 "SQL value should be obfuscated"
276 );
277 }
278
279 #[test]
280 fn test_transform_key_with_object_value_falls_through() {
281 let input = r#"{"object":{"not a":"query"}}"#;
282 let expected = r#"{"object":{"not a":"?"}}"#;
283 let (res, err) = obf_sql(&[], &["object"]).obfuscate(input);
284 assert_eq!(err, None);
285
286 assert_json_eq(&res, expected);
287 }
288
289 #[test]
290 fn test_transform_key_with_array_value_falls_through() {
291 let input = r#"{"object":["not","a","query"]}"#;
292 let expected = r#"{"object":["?","?","?"]}"#;
293 let (res, err) = obf_sql(&[], &["object"]).obfuscate(input);
294 assert_eq!(err, None);
295
296 assert_json_eq(&res, expected);
297 }
298}