1use cloakrs_core::{PiiEntity, Result, Scanner};
4use serde::{Deserialize, Serialize};
5use serde_json::{Map, Value};
6
7#[derive(Debug, Clone, Default, PartialEq, Eq, Serialize, Deserialize)]
9pub struct JsonScanOptions {
10 pub include_paths: Vec<String>,
12 pub exclude_paths: Vec<String>,
14}
15
16#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
18pub struct JsonStringScanResult {
19 pub path: String,
21 pub findings: Vec<PiiEntity>,
23 pub masked_value: Option<String>,
25}
26
27#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
29pub struct JsonScanResult {
30 pub strings: Vec<JsonStringScanResult>,
32 pub masked_json: Value,
34}
35
36pub fn scan_json_str(
65 input: &str,
66 scanner: &Scanner,
67 options: &JsonScanOptions,
68) -> Result<JsonScanResult> {
69 let value: Value = serde_json::from_str(input)?;
70 scan_json_value(&value, scanner, options)
71}
72
73pub fn scan_json_value(
75 value: &Value,
76 scanner: &Scanner,
77 options: &JsonScanOptions,
78) -> Result<JsonScanResult> {
79 let mut strings = Vec::new();
80 let masked_json = scan_value(value, "$", scanner, options, &mut strings)?;
81 Ok(JsonScanResult {
82 strings,
83 masked_json,
84 })
85}
86
87fn scan_value(
88 value: &Value,
89 path: &str,
90 scanner: &Scanner,
91 options: &JsonScanOptions,
92 strings: &mut Vec<JsonStringScanResult>,
93) -> Result<Value> {
94 match value {
95 Value::String(text) => scan_string(text, path, scanner, options, strings),
96 Value::Array(items) => {
97 let mut masked = Vec::with_capacity(items.len());
98 for (index, item) in items.iter().enumerate() {
99 masked.push(scan_value(
100 item,
101 &format!("{path}[{index}]"),
102 scanner,
103 options,
104 strings,
105 )?);
106 }
107 Ok(Value::Array(masked))
108 }
109 Value::Object(map) => {
110 let mut masked = Map::with_capacity(map.len());
111 for (key, item) in map {
112 masked.insert(
113 key.clone(),
114 scan_value(item, &format!("{path}.{key}"), scanner, options, strings)?,
115 );
116 }
117 Ok(Value::Object(masked))
118 }
119 _ => Ok(value.clone()),
120 }
121}
122
123fn scan_string(
124 text: &str,
125 path: &str,
126 scanner: &Scanner,
127 options: &JsonScanOptions,
128 strings: &mut Vec<JsonStringScanResult>,
129) -> Result<Value> {
130 if !path_allowed(path, options) {
131 return Ok(Value::String(text.to_string()));
132 }
133
134 let scan = scanner.scan(text)?;
135 let masked_value = scan.masked_text.clone();
136 if !scan.findings.is_empty() {
137 strings.push(JsonStringScanResult {
138 path: path.to_string(),
139 findings: scan.findings,
140 masked_value: masked_value.clone(),
141 });
142 }
143 Ok(Value::String(
144 masked_value.unwrap_or_else(|| text.to_string()),
145 ))
146}
147
148fn path_allowed(path: &str, options: &JsonScanOptions) -> bool {
149 let included = options.include_paths.is_empty()
150 || options
151 .include_paths
152 .iter()
153 .any(|pattern| path_matches(pattern, path));
154 let excluded = options
155 .exclude_paths
156 .iter()
157 .any(|pattern| path_matches(pattern, path));
158 included && !excluded
159}
160
161fn path_matches(pattern: &str, path: &str) -> bool {
162 if pattern == path {
163 return true;
164 }
165 if !pattern.contains("[*]") {
166 return false;
167 }
168 let mut rest = path;
169 for part in pattern.split("[*]") {
170 if part.is_empty() {
171 continue;
172 }
173 let Some(index) = rest.find(part) else {
174 return false;
175 };
176 rest = &rest[index + part.len()..];
177 }
178 true
179}
180
181#[cfg(test)]
182mod tests {
183 use super::*;
184 use cloakrs_core::Locale;
185 use cloakrs_patterns::default_registry;
186
187 fn scanner() -> Scanner {
188 default_registry()
189 .into_scanner_builder()
190 .locale(Locale::US)
191 .build()
192 .unwrap()
193 }
194
195 #[test]
196 fn test_scan_json_str_nested_object_detects_path() {
197 let input = r#"{"user":{"email":"jane@example.com"}}"#;
198 let result = scan_json_str(input, &scanner(), &JsonScanOptions::default()).unwrap();
199 assert_eq!(result.strings[0].path, "$.user.email");
200 assert_eq!(result.masked_json["user"]["email"], "[EMAIL]");
201 }
202
203 #[test]
204 fn test_scan_json_str_arrays_use_indexed_paths() {
205 let input = r#"{"records":[{"email":"jane@example.com"}]}"#;
206 let result = scan_json_str(input, &scanner(), &JsonScanOptions::default()).unwrap();
207 assert_eq!(result.strings[0].path, "$.records[0].email");
208 }
209
210 #[test]
211 fn test_scan_json_str_include_paths_filters() {
212 let input =
213 r#"{"user":{"email":"jane@example.com"},"metadata":{"email":"ops@example.com"}}"#;
214 let options = JsonScanOptions {
215 include_paths: vec!["$.user.email".to_string()],
216 exclude_paths: Vec::new(),
217 };
218 let result = scan_json_str(input, &scanner(), &options).unwrap();
219 assert_eq!(result.strings.len(), 1);
220 assert_eq!(result.strings[0].path, "$.user.email");
221 }
222
223 #[test]
224 fn test_scan_json_str_exclude_paths_filters() {
225 let input =
226 r#"{"user":{"email":"jane@example.com"},"metadata":{"email":"ops@example.com"}}"#;
227 let options = JsonScanOptions {
228 include_paths: Vec::new(),
229 exclude_paths: vec!["$.metadata.email".to_string()],
230 };
231 let result = scan_json_str(input, &scanner(), &options).unwrap();
232 assert_eq!(result.strings.len(), 1);
233 assert_eq!(result.strings[0].path, "$.user.email");
234 }
235
236 #[test]
237 fn test_scan_json_str_wildcard_path_matches_array_items() {
238 let input = r#"{"records":[{"email":"jane@example.com"},{"email":"ops@example.com"}]}"#;
239 let options = JsonScanOptions {
240 include_paths: vec!["$.records[*].email".to_string()],
241 exclude_paths: Vec::new(),
242 };
243 let result = scan_json_str(input, &scanner(), &options).unwrap();
244 assert_eq!(result.strings.len(), 2);
245 }
246}