Skip to main content

cloakrs_adapters/
json.rs

1//! JSON adapter for scanning string values with path metadata.
2
3use cloakrs_core::{PiiEntity, Result, Scanner};
4use serde::{Deserialize, Serialize};
5use serde_json::{Map, Value};
6
7/// Options controlling which JSON paths are scanned.
8#[derive(Debug, Clone, Default, PartialEq, Eq, Serialize, Deserialize)]
9pub struct JsonScanOptions {
10    /// JSONPath-like paths to include. Empty means include every string.
11    pub include_paths: Vec<String>,
12    /// JSONPath-like paths to skip.
13    pub exclude_paths: Vec<String>,
14}
15
16/// PII findings from one JSON string value.
17#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
18pub struct JsonStringScanResult {
19    /// JSON path where the string was found.
20    pub path: String,
21    /// Findings detected in this string.
22    pub findings: Vec<PiiEntity>,
23    /// Masked value when scanner masking is enabled.
24    pub masked_value: Option<String>,
25}
26
27/// Result of scanning a JSON document.
28#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
29pub struct JsonScanResult {
30    /// Findings grouped by JSON string path.
31    pub strings: Vec<JsonStringScanResult>,
32    /// JSON value with masked strings in place.
33    pub masked_json: Value,
34}
35
36/// Scans JSON text and returns path-aware findings.
37///
38/// # Examples
39///
40/// ```
41/// use cloakrs_adapters::{scan_json_str, JsonScanOptions};
42/// use cloakrs_core::{Confidence, EntityType, Locale, PiiEntity, Recognizer, Scanner, Span};
43///
44/// struct Email;
45/// impl Recognizer for Email {
46///     fn id(&self) -> &str { "email_test" }
47///     fn entity_type(&self) -> EntityType { EntityType::Email }
48///     fn supported_locales(&self) -> &[Locale] { &[] }
49///     fn scan(&self, text: &str) -> Vec<PiiEntity> {
50///         text.find('@').map(|at| PiiEntity {
51///             entity_type: EntityType::Email,
52///             span: Span::new(0, at + 4),
53///             text: text[..at + 4].to_string(),
54///             confidence: Confidence::new(0.9).unwrap(),
55///             recognizer_id: self.id().to_string(),
56///         }).into_iter().collect()
57///     }
58/// }
59///
60/// let scanner = Scanner::builder().recognizer(Email).build().unwrap();
61/// let result = scan_json_str(r#"{"email":"a@b.test"}"#, &scanner, &JsonScanOptions::default()).unwrap();
62/// assert_eq!(result.strings[0].path, "$.email");
63/// ```
64pub fn scan_json_str(
65    input: &str,
66    scanner: &Scanner,
67    options: &JsonScanOptions,
68) -> Result<JsonScanResult> {
69    let value: Value = serde_json::from_str(input)?;
70    scan_json_value(&value, scanner, options)
71}
72
73/// Scans an already parsed JSON value.
74pub fn scan_json_value(
75    value: &Value,
76    scanner: &Scanner,
77    options: &JsonScanOptions,
78) -> Result<JsonScanResult> {
79    let mut strings = Vec::new();
80    let masked_json = scan_value(value, "$", scanner, options, &mut strings)?;
81    Ok(JsonScanResult {
82        strings,
83        masked_json,
84    })
85}
86
87fn scan_value(
88    value: &Value,
89    path: &str,
90    scanner: &Scanner,
91    options: &JsonScanOptions,
92    strings: &mut Vec<JsonStringScanResult>,
93) -> Result<Value> {
94    match value {
95        Value::String(text) => scan_string(text, path, scanner, options, strings),
96        Value::Array(items) => {
97            let mut masked = Vec::with_capacity(items.len());
98            for (index, item) in items.iter().enumerate() {
99                masked.push(scan_value(
100                    item,
101                    &format!("{path}[{index}]"),
102                    scanner,
103                    options,
104                    strings,
105                )?);
106            }
107            Ok(Value::Array(masked))
108        }
109        Value::Object(map) => {
110            let mut masked = Map::with_capacity(map.len());
111            for (key, item) in map {
112                masked.insert(
113                    key.clone(),
114                    scan_value(item, &format!("{path}.{key}"), scanner, options, strings)?,
115                );
116            }
117            Ok(Value::Object(masked))
118        }
119        _ => Ok(value.clone()),
120    }
121}
122
123fn scan_string(
124    text: &str,
125    path: &str,
126    scanner: &Scanner,
127    options: &JsonScanOptions,
128    strings: &mut Vec<JsonStringScanResult>,
129) -> Result<Value> {
130    if !path_allowed(path, options) {
131        return Ok(Value::String(text.to_string()));
132    }
133
134    let scan = scanner.scan(text)?;
135    let masked_value = scan.masked_text.clone();
136    if !scan.findings.is_empty() {
137        strings.push(JsonStringScanResult {
138            path: path.to_string(),
139            findings: scan.findings,
140            masked_value: masked_value.clone(),
141        });
142    }
143    Ok(Value::String(
144        masked_value.unwrap_or_else(|| text.to_string()),
145    ))
146}
147
148fn path_allowed(path: &str, options: &JsonScanOptions) -> bool {
149    let included = options.include_paths.is_empty()
150        || options
151            .include_paths
152            .iter()
153            .any(|pattern| path_matches(pattern, path));
154    let excluded = options
155        .exclude_paths
156        .iter()
157        .any(|pattern| path_matches(pattern, path));
158    included && !excluded
159}
160
161fn path_matches(pattern: &str, path: &str) -> bool {
162    if pattern == path {
163        return true;
164    }
165    if !pattern.contains("[*]") {
166        return false;
167    }
168    let mut rest = path;
169    for part in pattern.split("[*]") {
170        if part.is_empty() {
171            continue;
172        }
173        let Some(index) = rest.find(part) else {
174            return false;
175        };
176        rest = &rest[index + part.len()..];
177    }
178    true
179}
180
181#[cfg(test)]
182mod tests {
183    use super::*;
184    use cloakrs_core::Locale;
185    use cloakrs_patterns::default_registry;
186
187    fn scanner() -> Scanner {
188        default_registry()
189            .into_scanner_builder()
190            .locale(Locale::US)
191            .build()
192            .unwrap()
193    }
194
195    #[test]
196    fn test_scan_json_str_nested_object_detects_path() {
197        let input = r#"{"user":{"email":"jane@example.com"}}"#;
198        let result = scan_json_str(input, &scanner(), &JsonScanOptions::default()).unwrap();
199        assert_eq!(result.strings[0].path, "$.user.email");
200        assert_eq!(result.masked_json["user"]["email"], "[EMAIL]");
201    }
202
203    #[test]
204    fn test_scan_json_str_arrays_use_indexed_paths() {
205        let input = r#"{"records":[{"email":"jane@example.com"}]}"#;
206        let result = scan_json_str(input, &scanner(), &JsonScanOptions::default()).unwrap();
207        assert_eq!(result.strings[0].path, "$.records[0].email");
208    }
209
210    #[test]
211    fn test_scan_json_str_include_paths_filters() {
212        let input =
213            r#"{"user":{"email":"jane@example.com"},"metadata":{"email":"ops@example.com"}}"#;
214        let options = JsonScanOptions {
215            include_paths: vec!["$.user.email".to_string()],
216            exclude_paths: Vec::new(),
217        };
218        let result = scan_json_str(input, &scanner(), &options).unwrap();
219        assert_eq!(result.strings.len(), 1);
220        assert_eq!(result.strings[0].path, "$.user.email");
221    }
222
223    #[test]
224    fn test_scan_json_str_exclude_paths_filters() {
225        let input =
226            r#"{"user":{"email":"jane@example.com"},"metadata":{"email":"ops@example.com"}}"#;
227        let options = JsonScanOptions {
228            include_paths: Vec::new(),
229            exclude_paths: vec!["$.metadata.email".to_string()],
230        };
231        let result = scan_json_str(input, &scanner(), &options).unwrap();
232        assert_eq!(result.strings.len(), 1);
233        assert_eq!(result.strings[0].path, "$.user.email");
234    }
235
236    #[test]
237    fn test_scan_json_str_wildcard_path_matches_array_items() {
238        let input = r#"{"records":[{"email":"jane@example.com"},{"email":"ops@example.com"}]}"#;
239        let options = JsonScanOptions {
240            include_paths: vec!["$.records[*].email".to_string()],
241            exclude_paths: Vec::new(),
242        };
243        let result = scan_json_str(input, &scanner(), &options).unwrap();
244        assert_eq!(result.strings.len(), 2);
245    }
246}