1use std::collections::HashSet;
5
6use crate::json_scanner::{Op, Scanner};
7
8type Transformer = Box<dyn Fn(&str) -> String + Send + Sync>;
9
10pub struct JsonObfuscator {
18 keep_keys: HashSet<String>,
19 transform_keys: HashSet<String>,
20 transformer: Option<Transformer>,
21}
22
23enum ClosureKind {
24 Array,
25 Object,
26}
27
28impl JsonObfuscator {
29 pub fn new(
30 keep_keys: impl IntoIterator<Item = String>,
31 transform_keys: impl IntoIterator<Item = String>,
32 transformer: Option<Transformer>,
33 ) -> Self {
34 Self {
35 keep_keys: keep_keys.into_iter().collect(),
36 transform_keys: transform_keys.into_iter().collect(),
37 transformer,
38 }
39 }
40
41 pub fn obfuscate(&self, input: &str) -> (String, Option<String>) {
44 if input.is_empty() {
45 return (String::new(), None);
46 }
47
48 let mut out = String::with_capacity(input.len());
49 let mut scanner = Scanner::new();
50 let mut buf = String::new(); let mut closures: Vec<ClosureKind> = Vec::new();
52 let mut keep_depth: usize = 0;
53 let mut key = false;
54 let mut wiped = false;
55 let mut keeping = false;
56 let mut transforming_value = false;
57
58 for c in input.chars() {
59 let op = scanner.step(c);
60 let depth = closures.len(); match op {
63 Op::BeginObject => {
64 closures.push(ClosureKind::Object);
65 set_key(&closures, &mut key, &mut wiped);
66 transforming_value = false;
67 }
68 Op::BeginArray => {
69 closures.push(ClosureKind::Array);
70 set_key(&closures, &mut key, &mut wiped);
71 transforming_value = false;
72 }
73 Op::EndArray | Op::EndObject => {
74 closures.pop();
75 set_key(&closures, &mut key, &mut wiped);
76 handle_value_done(
77 &mut out,
78 &mut buf,
79 &mut keeping,
80 &mut transforming_value,
81 &mut keep_depth,
82 depth,
83 self.transformer.as_deref(),
84 );
85 }
86 Op::ObjectValue | Op::ArrayValue => {
87 set_key(&closures, &mut key, &mut wiped);
88 handle_value_done(
89 &mut out,
90 &mut buf,
91 &mut keeping,
92 &mut transforming_value,
93 &mut keep_depth,
94 depth,
95 self.transformer.as_deref(),
96 );
97 }
98 Op::BeginLiteral | Op::Continue => {
99 if transforming_value {
100 buf.push(c);
101 continue;
102 } else if key {
103 buf.push(c);
104 } else if !keeping {
105 if !wiped {
106 out.push_str("\"?\"");
107 wiped = true;
108 }
109 continue;
110 }
111 }
112 Op::ObjectKey => {
113 let k = buf.trim_matches('"');
114 if !keeping && self.keep_keys.contains(k) {
115 keeping = true;
116 keep_depth = depth + 1;
117 } else if !transforming_value
118 && self.transformer.is_some()
119 && self.transform_keys.contains(k)
120 {
121 transforming_value = true;
122 }
123 buf.clear();
124 key = false;
125 }
126 Op::SkipSpace => continue,
127 Op::Error => {
128 out.push_str("...");
129 return (out, scanner.err);
130 }
131 Op::End => {} }
133
134 out.push(c);
135 }
136
137 if scanner.eof() == Op::Error {
138 out.push_str("...");
139 }
140 (out, scanner.err)
141 }
142}
143
144fn set_key(closures: &[ClosureKind], key: &mut bool, wiped: &mut bool) {
147 let n = closures.len();
148 *key = n == 0 || matches!(closures[n - 1], ClosureKind::Object);
149 *wiped = false;
150}
151
152fn handle_value_done(
155 out: &mut String,
156 buf: &mut String,
157 keeping: &mut bool,
158 transforming_value: &mut bool,
159 keep_depth: &mut usize,
160 depth: usize,
161 transformer: Option<&(dyn Fn(&str) -> String + Send + Sync)>,
162) {
163 if *transforming_value {
164 if let Some(t) = transformer {
165 let raw: String =
167 serde_json::from_str(buf).unwrap_or_else(|_| buf.trim_matches('"').to_string());
168 let result = t(&raw);
169 out.push('"');
170 out.push_str(&result);
171 out.push('"');
172 *transforming_value = false;
173 buf.clear();
174 }
175 } else if *keeping && depth < *keep_depth {
176 *keeping = false;
177 }
178}
179
180#[cfg(test)]
181mod tests {
182 use duplicate::duplicate_item;
183 use serde_json::json;
184
185 use super::JsonObfuscator;
186 use crate::sql::obfuscate_sql_string;
187
188 fn obf(keep_keys: &[&str]) -> JsonObfuscator {
189 JsonObfuscator::new(keep_keys.iter().map(|s| s.to_string()), [], None)
190 }
191
192 fn obf_sql(keep_keys: &[&str], transform_keys: &[&str]) -> JsonObfuscator {
193 JsonObfuscator::new(
194 keep_keys.iter().map(|s| s.to_string()),
195 transform_keys.iter().map(|s| s.to_string()),
196 Some(Box::new(obfuscate_sql_string)),
197 )
198 }
199
200 fn assert_json_eq(result: &str, expected: &str) {
201 let result: serde_json::Value =
202 serde_json::from_str(result).expect("result is not valid JSON");
203 let expected: serde_json::Value =
204 serde_json::from_str(expected).expect("expected is not valid JSON");
205 assert_eq!(result, expected);
206 }
207
208 #[duplicate_item(
211 test_name keep_keys input expected;
212 [test_empty_object] [&[]] ["{}"] ["{}"];
213 [test_empty_array] [&[]] ["[]"] ["[]"];
214 [test_emoji_object] [&["๐ต"]] [r#"{"๐ต":"๐"}"#] [r#"{"๐ต":"๐"}"#];
215 [test_nested_empty_objects] [&[]] [r#"{"a":{},"b":{"c":{}}}"#] [r#"{"a":{},"b":{"c":{}}}"#];
216 [test_boolean_and_null_obfuscated][&[]] [r#"{"a":true,"b":false,"c":null}"#] [r#"{"a":"?","b":"?","c":"?"}"#];
217 [test_all_values_obfuscated] [&[]] [r#"{"query":{"multi_match":{"query":"guide","fields":["_all",{"key":"value","other":["1","2",{"k":"v"}]},"2"]}}}"#] [r#"{"query":{"multi_match":{"query":"?","fields":["?",{"key":"?","other":["?","?",{"k":"?"}]},"?"]}}}"#];
218 [test_numbers_obfuscated] [&[]] [r#"{"highlight":{"pre_tags":["<em>"],"post_tags":["</em>"],"index":1}}"#] [r#"{"highlight":{"pre_tags":["?"],"post_tags":["?"],"index":"?"}}"#];
219 [test_keep_key_keeps_entire_value][&["other"]] [r#"{"query":{"multi_match":{"query":"guide","fields":["_all",{"key":"value","other":["1","2",{"k":"v"}]},"2"]}}}"#] [r#"{"query":{"multi_match":{"query":"?","fields":["?",{"key":"?","other":["1","2",{"k":"v"}]},"?"]}}}"#];
220 [test_keep_key_nested_array_fully_kept][&["fields"]] [r#"{"fields":["_all",{"key":"value","other":["1","2",{"k":"v"}]},"2"]}"#] [r#"{"fields":["_all",{"key":"value","other":["1","2",{"k":"v"}]},"2"]}"#];
221 [test_keep_key_deep_nested] [&["k"]] [r#"{"fields":["_all",{"key":"value","other":["1","2",{"k":"v"}]},"2"]}"#] [r#"{"fields":["?",{"key":"?","other":["?","?",{"k":"v"}]},"?"]}"#];
222 [test_keep_key_in_nested_object] [&["C"]] [r#"{"fields":[{"A":1,"B":{"C":3}},"2"]}"#] [r#"{"fields":[{"A":"?","B":{"C":3}},"?"]}"#];
223 [test_keep_key_large_nested_structure][&["hits"]] [r#"{"outer":{"total":2,"max_score":0.9105287,"hits":[{"_index":"bookdb_index","_score":0.9105287}]}}"#] [r#"{"outer":{"total":"?","max_score":"?","hits":[{"_index":"bookdb_index","_score":0.9105287}]}}"#];
224 [test_keep_multiple_keys] [&["_index","title"]][r#"{"hits":[{"_index":"bookdb_index","_type":"book","_score":0.9,"_source":{"summary":"text","title":"ES in Action","publish_date":"2015-12-03"},"highlight":{"title":["ES Action"]}}]}"#] [r#"{"hits":[{"_index":"bookdb_index","_type":"?","_score":"?","_source":{"summary":"?","title":"ES in Action","publish_date":"?"},"highlight":{"title":["ES Action"]}}]}"#];
225 [test_keep_key_wallet] [&["company_wallet_configuration_id"]] [r#"{"email":"dev@datadoghq.com","company_wallet_configuration_id":1}"#] [r#"{"email":"?","company_wallet_configuration_id":1}"#];
226 )]
227 #[test]
228 fn test_name() {
229 let (res, err) = obf(keep_keys).obfuscate(input);
230 assert_eq!(err, None);
231 assert_json_eq(&res, expected);
232 }
233
234 #[duplicate_item(
236 test_name input expected expected_error;
237 [test_empty_input] [""] [""] [None];
238 [test_invalid_emoji] ["๐คจ"] ["..."] [Some("invalid character '๐คจ' looking for beginning of value".to_owned())];
239 [test_invalid_unicode] ["แธ"] ["..."] [Some("invalid character 'แธ' looking for beginning of value".to_owned())];
240 [test_invalid_json_appends_ellipsis]["INVALID"] ["..."] [Some("invalid character 'I' looking for beginning of value".to_owned())];
241 [test_invalid_single_char] [")"] ["..."] [Some("invalid character ')' looking for beginning of value".to_owned())];
242 [test_truncated_open_value_string] [r#"{"query":""#] [r#"{"query":"?"..."#] [Some("unexpected end of JSON input at char position 11".to_owned())];
243 [test_truncated_multi_json] [r#"{"first json": "valid"} {"second json": "unfinished"#] [r#"{"first json":"?"} {"second json":"?"..."#] [Some("unexpected end of JSON input at char position 53".to_owned())];
244 )]
245 #[test]
246 fn test_name() {
247 let (res, err) = obf(&[]).obfuscate(input);
248 assert_eq!(res, expected);
249 assert_eq!(err, expected_error);
250 }
251
252 #[test]
253 fn test_multiple_json_objects() {
254 let input = r#"{"index":{"_index":"traces","_type":"trace"}} {"value":1,"name":"test"}"#;
256 let (result, err) = obf(&[]).obfuscate(input);
257 assert_eq!(err, None);
258 let mut stream =
259 serde_json::Deserializer::from_str(&result).into_iter::<serde_json::Value>();
260 let first = stream
261 .next()
262 .expect("first value")
263 .expect("first value is valid JSON");
264 let second = stream
265 .next()
266 .expect("second value")
267 .expect("second value is valid JSON");
268 assert_eq!(first, json!({"index":{"_index":"?","_type":"?"}}));
269 assert_eq!(second, json!({"value":"?","name":"?"}));
270 }
271
272 #[test]
273 fn test_transform_key_sql_basic() {
274 let input = r#"{"query":"select * from table where id = 2","hello":"world","hi":"there"}"#;
275 let (result, err) = obf_sql(&["hello"], &["query"]).obfuscate(input);
276 assert_eq!(err, None);
277
278 let val: serde_json::Value = serde_json::from_str(&result).unwrap();
279 assert_eq!(val["hello"], json!("world"));
280 assert_eq!(val["hi"], json!("?"));
281 assert!(
282 val["query"].as_str().unwrap().contains('?'),
283 "SQL value should be obfuscated"
284 );
285 }
286
287 #[test]
288 fn test_transform_key_with_object_value_falls_through() {
289 let input = r#"{"object":{"not a":"query"}}"#;
290 let expected = r#"{"object":{"not a":"?"}}"#;
291 let (res, err) = obf_sql(&[], &["object"]).obfuscate(input);
292 assert_eq!(err, None);
293
294 assert_json_eq(&res, expected);
295 }
296
297 #[test]
298 fn test_transform_key_with_array_value_falls_through() {
299 let input = r#"{"object":["not","a","query"]}"#;
300 let expected = r#"{"object":["?","?","?"]}"#;
301 let (res, err) = obf_sql(&[], &["object"]).obfuscate(input);
302 assert_eq!(err, None);
303
304 assert_json_eq(&res, expected);
305 }
306}