sanitize_engine/processor/
json_proc.rs1use crate::error::{Result, SanitizeError};
15use crate::processor::{find_matching_rule, replace_value, FileTypeProfile, Processor};
16use crate::store::MappingStore;
17use serde_json::Value;
18
19const MAX_JSON_DEPTH: usize = 128;
22
23const MAX_JSON_INPUT_SIZE: usize = 256 * 1024 * 1024; pub struct JsonProcessor;
29
30impl Processor for JsonProcessor {
31 fn name(&self) -> &'static str {
32 "json"
33 }
34
35 fn can_handle(&self, content: &[u8], profile: &FileTypeProfile) -> bool {
36 if profile.processor == "json" {
37 return true;
38 }
39 let trimmed = content.iter().copied().find(|b| !b.is_ascii_whitespace());
41 matches!(trimmed, Some(b'{' | b'['))
42 }
43
44 fn process(
45 &self,
46 content: &[u8],
47 profile: &FileTypeProfile,
48 store: &MappingStore,
49 ) -> Result<Vec<u8>> {
50 if content.len() > MAX_JSON_INPUT_SIZE {
52 return Err(SanitizeError::InputTooLarge {
53 size: content.len(),
54 limit: MAX_JSON_INPUT_SIZE,
55 });
56 }
57
58 let text = std::str::from_utf8(content).map_err(|e| SanitizeError::ParseError {
59 format: "JSON".into(),
60 message: format!("invalid UTF-8: {}", e),
61 })?;
62
63 let mut value: Value =
64 serde_json::from_str(text).map_err(|e| SanitizeError::ParseError {
65 format: "JSON".into(),
66 message: format!("JSON parse error: {}", e),
67 })?;
68
69 walk_json(&mut value, "", profile, store, 0)?;
70
71 let compact = profile.options.get("compact").is_some_and(|v| v == "true");
72
73 let output = if compact {
74 serde_json::to_vec(&value)
75 } else {
76 serde_json::to_vec_pretty(&value)
77 }
78 .map_err(|e| SanitizeError::IoError(format!("JSON serialize error: {}", e)))?;
79
80 Ok(output)
81 }
82}
83
84fn walk_json(
89 value: &mut Value,
90 prefix: &str,
91 profile: &FileTypeProfile,
92 store: &MappingStore,
93 depth: usize,
94) -> Result<()> {
95 if depth > MAX_JSON_DEPTH {
96 return Err(SanitizeError::RecursionDepthExceeded(format!(
97 "JSON recursion depth exceeds limit of {MAX_JSON_DEPTH}"
98 )));
99 }
100 match value {
101 Value::Object(map) => {
102 let keys: Vec<String> = map.keys().cloned().collect();
103 for key in keys {
104 let path = if prefix.is_empty() {
105 key.clone()
106 } else {
107 format!("{}.{}", prefix, key)
108 };
109
110 if let Some(v) = map.get_mut(&key) {
111 match v {
112 Value::String(s) => {
113 if let Some(rule) = find_matching_rule(&path, profile) {
114 *s = replace_value(s, rule, store)?;
115 }
116 }
117 Value::Number(_) | Value::Bool(_) => {
118 if let Some(rule) = find_matching_rule(&path, profile) {
119 let repr = v.to_string();
120 let replaced = replace_value(&repr, rule, store)?;
121 *v = Value::String(replaced);
122 }
123 }
124 Value::Object(_) | Value::Array(_) => {
125 walk_json(v, &path, profile, store, depth + 1)?;
126 }
127 Value::Null => {}
128 }
129 }
130 }
131 }
132 Value::Array(arr) => {
133 for item in arr.iter_mut() {
134 walk_json(item, prefix, profile, store, depth + 1)?;
135 }
136 }
137 _ => {}
138 }
139 Ok(())
140}
141
142#[cfg(test)]
143mod tests {
144 use super::*;
145 use crate::category::Category;
146 use crate::generator::HmacGenerator;
147 use crate::processor::profile::FieldRule;
148 use std::sync::Arc;
149
150 fn make_store() -> MappingStore {
151 let gen = Arc::new(HmacGenerator::new([42u8; 32]));
152 MappingStore::new(gen, None)
153 }
154
155 #[test]
156 fn basic_json_replacement() {
157 let store = make_store();
158 let proc = JsonProcessor;
159
160 let content =
161 br#"{"database": {"host": "db.corp.com", "password": "s3cret"}, "port": 5432}"#;
162 let profile = FileTypeProfile::new(
163 "json",
164 vec![
165 FieldRule::new("database.password").with_category(Category::Custom("pw".into())),
166 FieldRule::new("database.host").with_category(Category::Hostname),
167 ],
168 )
169 .with_option("compact", "true");
170
171 let result = proc.process(content, &profile, &store).unwrap();
172 let out: Value = serde_json::from_slice(&result).unwrap();
173
174 assert_ne!(out["database"]["password"].as_str().unwrap(), "s3cret");
175 assert_ne!(out["database"]["host"].as_str().unwrap(), "db.corp.com");
176 assert_eq!(out["port"], 5432);
177 }
178
179 #[test]
180 fn json_array_traversal() {
181 let store = make_store();
182 let proc = JsonProcessor;
183
184 let content = br#"{"users": [{"email": "a@b.com"}, {"email": "c@d.com"}]}"#;
185 let profile = FileTypeProfile::new(
186 "json",
187 vec![FieldRule::new("users.email").with_category(Category::Email)],
188 )
189 .with_option("compact", "true");
190
191 let result = proc.process(content, &profile, &store).unwrap();
192 let out: Value = serde_json::from_slice(&result).unwrap();
193
194 let users = out["users"].as_array().unwrap();
195 assert_ne!(users[0]["email"].as_str().unwrap(), "a@b.com");
196 assert_ne!(users[1]["email"].as_str().unwrap(), "c@d.com");
197 }
198
199 #[test]
200 fn json_glob_suffix_pattern() {
201 let store = make_store();
202 let proc = JsonProcessor;
203
204 let content =
205 br#"{"db": {"password": "pw1"}, "cache": {"password": "pw2"}, "name": "app"}"#;
206 let profile = FileTypeProfile::new(
207 "json",
208 vec![FieldRule::new("*.password").with_category(Category::Custom("pw".into()))],
209 )
210 .with_option("compact", "true");
211
212 let result = proc.process(content, &profile, &store).unwrap();
213 let out: Value = serde_json::from_slice(&result).unwrap();
214
215 assert_ne!(out["db"]["password"].as_str().unwrap(), "pw1");
216 assert_ne!(out["cache"]["password"].as_str().unwrap(), "pw2");
217 assert_eq!(out["name"], "app");
218 }
219}