use cloakrs_core::{PiiEntity, Result, Scanner};
use serde::{Deserialize, Serialize};
use serde_json::{Map, Value};
#[derive(Debug, Clone, Default, PartialEq, Eq, Serialize, Deserialize)]
pub struct JsonScanOptions {
pub include_paths: Vec<String>,
pub exclude_paths: Vec<String>,
}
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
pub struct JsonStringScanResult {
pub path: String,
pub findings: Vec<PiiEntity>,
pub masked_value: Option<String>,
}
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
pub struct JsonScanResult {
pub strings: Vec<JsonStringScanResult>,
pub masked_json: Value,
}
pub fn scan_json_str(
input: &str,
scanner: &Scanner,
options: &JsonScanOptions,
) -> Result<JsonScanResult> {
let value: Value = serde_json::from_str(input)?;
scan_json_value(&value, scanner, options)
}
pub fn scan_json_value(
value: &Value,
scanner: &Scanner,
options: &JsonScanOptions,
) -> Result<JsonScanResult> {
let mut strings = Vec::new();
let masked_json = scan_value(value, "$", scanner, options, &mut strings)?;
Ok(JsonScanResult {
strings,
masked_json,
})
}
fn scan_value(
value: &Value,
path: &str,
scanner: &Scanner,
options: &JsonScanOptions,
strings: &mut Vec<JsonStringScanResult>,
) -> Result<Value> {
match value {
Value::String(text) => scan_string(text, path, scanner, options, strings),
Value::Array(items) => {
let mut masked = Vec::with_capacity(items.len());
for (index, item) in items.iter().enumerate() {
masked.push(scan_value(
item,
&format!("{path}[{index}]"),
scanner,
options,
strings,
)?);
}
Ok(Value::Array(masked))
}
Value::Object(map) => {
let mut masked = Map::with_capacity(map.len());
for (key, item) in map {
masked.insert(
key.clone(),
scan_value(item, &format!("{path}.{key}"), scanner, options, strings)?,
);
}
Ok(Value::Object(masked))
}
_ => Ok(value.clone()),
}
}
fn scan_string(
text: &str,
path: &str,
scanner: &Scanner,
options: &JsonScanOptions,
strings: &mut Vec<JsonStringScanResult>,
) -> Result<Value> {
if !path_allowed(path, options) {
return Ok(Value::String(text.to_string()));
}
let scan = scanner.scan(text)?;
let masked_value = scan.masked_text.clone();
if !scan.findings.is_empty() {
strings.push(JsonStringScanResult {
path: path.to_string(),
findings: scan.findings,
masked_value: masked_value.clone(),
});
}
Ok(Value::String(
masked_value.unwrap_or_else(|| text.to_string()),
))
}
fn path_allowed(path: &str, options: &JsonScanOptions) -> bool {
let included = options.include_paths.is_empty()
|| options
.include_paths
.iter()
.any(|pattern| path_matches(pattern, path));
let excluded = options
.exclude_paths
.iter()
.any(|pattern| path_matches(pattern, path));
included && !excluded
}
fn path_matches(pattern: &str, path: &str) -> bool {
if pattern == path {
return true;
}
if !pattern.contains("[*]") {
return false;
}
let mut rest = path;
for part in pattern.split("[*]") {
if part.is_empty() {
continue;
}
let Some(index) = rest.find(part) else {
return false;
};
rest = &rest[index + part.len()..];
}
true
}
#[cfg(test)]
mod tests {
use super::*;
use cloakrs_core::Locale;
use cloakrs_patterns::default_registry;
fn scanner() -> Scanner {
default_registry()
.into_scanner_builder()
.locale(Locale::US)
.build()
.unwrap()
}
#[test]
fn test_scan_json_str_nested_object_detects_path() {
let input = r#"{"user":{"email":"jane@example.com"}}"#;
let result = scan_json_str(input, &scanner(), &JsonScanOptions::default()).unwrap();
assert_eq!(result.strings[0].path, "$.user.email");
assert_eq!(result.masked_json["user"]["email"], "[EMAIL]");
}
#[test]
fn test_scan_json_str_arrays_use_indexed_paths() {
let input = r#"{"records":[{"email":"jane@example.com"}]}"#;
let result = scan_json_str(input, &scanner(), &JsonScanOptions::default()).unwrap();
assert_eq!(result.strings[0].path, "$.records[0].email");
}
#[test]
fn test_scan_json_str_include_paths_filters() {
let input =
r#"{"user":{"email":"jane@example.com"},"metadata":{"email":"ops@example.com"}}"#;
let options = JsonScanOptions {
include_paths: vec!["$.user.email".to_string()],
exclude_paths: Vec::new(),
};
let result = scan_json_str(input, &scanner(), &options).unwrap();
assert_eq!(result.strings.len(), 1);
assert_eq!(result.strings[0].path, "$.user.email");
}
#[test]
fn test_scan_json_str_exclude_paths_filters() {
let input =
r#"{"user":{"email":"jane@example.com"},"metadata":{"email":"ops@example.com"}}"#;
let options = JsonScanOptions {
include_paths: Vec::new(),
exclude_paths: vec!["$.metadata.email".to_string()],
};
let result = scan_json_str(input, &scanner(), &options).unwrap();
assert_eq!(result.strings.len(), 1);
assert_eq!(result.strings[0].path, "$.user.email");
}
#[test]
fn test_scan_json_str_wildcard_path_matches_array_items() {
let input = r#"{"records":[{"email":"jane@example.com"},{"email":"ops@example.com"}]}"#;
let options = JsonScanOptions {
include_paths: vec!["$.records[*].email".to_string()],
exclude_paths: Vec::new(),
};
let result = scan_json_str(input, &scanner(), &options).unwrap();
assert_eq!(result.strings.len(), 2);
}
}