Skip to main content

sanitize_engine/processor/
json_proc.rs

1//! JSON structured processor.
2//!
3//! Parses JSON input, walks the value tree, replaces values at matched
4//! key paths, and serializes back to JSON preserving structure.
5//!
6//! # Key Paths
7//!
8//! Nested keys are expressed as dot-separated paths:
9//! `database.password`, `smtp.credentials.user`.
10//!
11//! Array elements are traversed transparently — a rule for `users.email`
12//! matches the `email` field inside every object in the `users` array.
13
14use crate::error::{Result, SanitizeError};
15use crate::processor::limits::DEFAULT_INPUT_SIZE;
16use crate::processor::{walk_tree, FileTypeProfile, Processor, TreeNode};
17use crate::store::MappingStore;
18use serde_json::Value;
19
20/// Structured processor for JSON files.
21pub struct JsonProcessor;
22
23impl Processor for JsonProcessor {
24    fn name(&self) -> &'static str {
25        "json"
26    }
27
28    fn can_handle(&self, content: &[u8], profile: &FileTypeProfile) -> bool {
29        if profile.processor == "json" {
30            return true;
31        }
32        // Heuristic: starts with `{` or `[` after optional whitespace.
33        let trimmed = content.iter().copied().find(|b| !b.is_ascii_whitespace());
34        matches!(trimmed, Some(b'{' | b'['))
35    }
36
37    fn process(
38        &self,
39        content: &[u8],
40        profile: &FileTypeProfile,
41        store: &MappingStore,
42    ) -> Result<Vec<u8>> {
43        // F-04 fix: enforce input size limit.
44        if content.len() > DEFAULT_INPUT_SIZE {
45            return Err(SanitizeError::InputTooLarge {
46                size: content.len(),
47                limit: DEFAULT_INPUT_SIZE,
48            });
49        }
50
51        let text = std::str::from_utf8(content).map_err(|e| SanitizeError::ParseError {
52            format: "JSON".into(),
53            message: format!("invalid UTF-8: {}", e),
54        })?;
55
56        let mut value: Value =
57            serde_json::from_str(text).map_err(|e| SanitizeError::ParseError {
58                format: "JSON".into(),
59                message: format!("JSON parse error: {}", e),
60            })?;
61
62        walk_json(&mut value, "", profile, store, 0)?;
63
64        let compact = profile.options.get("compact").is_some_and(|v| v == "true");
65
66        let output = if compact {
67            serde_json::to_vec(&value)
68        } else {
69            serde_json::to_vec_pretty(&value)
70        }
71        .map_err(|e| {
72            SanitizeError::IoError(std::io::Error::other(format!("JSON serialize error: {e}")))
73        })?;
74
75        Ok(output)
76    }
77}
78
79impl TreeNode for Value {
80    fn for_each_map_entry<F>(&mut self, mut f: F) -> Result<()>
81    where
82        F: FnMut(&str, &mut Self) -> Result<()>,
83    {
84        if let Self::Object(map) = self {
85            let keys: Vec<String> = map.keys().cloned().collect();
86            for key in keys {
87                if let Some(v) = map.get_mut(&key) {
88                    f(&key, v)?;
89                }
90            }
91        }
92        Ok(())
93    }
94
95    fn for_each_seq_item<F>(&mut self, mut f: F) -> Result<()>
96    where
97        F: FnMut(&mut Self) -> Result<()>,
98    {
99        if let Self::Array(arr) = self {
100            for item in arr.iter_mut() {
101                f(item)?;
102            }
103        }
104        Ok(())
105    }
106
107    fn as_str_mut(&mut self) -> Option<&mut String> {
108        if let Self::String(s) = self {
109            Some(s)
110        } else {
111            None
112        }
113    }
114
115    fn is_scalar(&self) -> bool {
116        matches!(self, Self::Number(_) | Self::Bool(_))
117    }
118
119    fn scalar_to_string(&self) -> String {
120        self.to_string()
121    }
122
123    fn set_string(&mut self, s: String) {
124        *self = Self::String(s);
125    }
126}
127
128/// Recursively walk a JSON value tree, replacing matched field values.
129pub(crate) fn walk_json(
130    value: &mut Value,
131    prefix: &str,
132    profile: &FileTypeProfile,
133    store: &MappingStore,
134    depth: usize,
135) -> Result<()> {
136    walk_tree(value, prefix, profile, store, depth, "JSON")
137}
138
139#[cfg(test)]
140mod tests {
141    use super::*;
142    use crate::category::Category;
143    use crate::generator::HmacGenerator;
144    use crate::processor::profile::FieldRule;
145    use std::sync::Arc;
146
147    fn make_store() -> MappingStore {
148        let gen = Arc::new(HmacGenerator::new([42u8; 32]));
149        MappingStore::new(gen, None)
150    }
151
152    #[test]
153    fn basic_json_replacement() {
154        let store = make_store();
155        let proc = JsonProcessor;
156
157        let content =
158            br#"{"database": {"host": "db.corp.com", "password": "s3cret"}, "port": 5432}"#;
159        let profile = FileTypeProfile::new(
160            "json",
161            vec![
162                FieldRule::new("database.password").with_category(Category::Custom("pw".into())),
163                FieldRule::new("database.host").with_category(Category::Hostname),
164            ],
165        )
166        .with_option("compact", "true");
167
168        let result = proc.process(content, &profile, &store).unwrap();
169        let out: Value = serde_json::from_slice(&result).unwrap();
170
171        assert_ne!(out["database"]["password"].as_str().unwrap(), "s3cret");
172        assert_ne!(out["database"]["host"].as_str().unwrap(), "db.corp.com");
173        assert_eq!(out["port"], 5432);
174    }
175
176    #[test]
177    fn json_array_traversal() {
178        let store = make_store();
179        let proc = JsonProcessor;
180
181        let content = br#"{"users": [{"email": "a@b.com"}, {"email": "c@d.com"}]}"#;
182        let profile = FileTypeProfile::new(
183            "json",
184            vec![FieldRule::new("users.email").with_category(Category::Email)],
185        )
186        .with_option("compact", "true");
187
188        let result = proc.process(content, &profile, &store).unwrap();
189        let out: Value = serde_json::from_slice(&result).unwrap();
190
191        let users = out["users"].as_array().unwrap();
192        assert_ne!(users[0]["email"].as_str().unwrap(), "a@b.com");
193        assert_ne!(users[1]["email"].as_str().unwrap(), "c@d.com");
194    }
195
196    #[test]
197    fn json_glob_suffix_pattern() {
198        let store = make_store();
199        let proc = JsonProcessor;
200
201        let content =
202            br#"{"db": {"password": "pw1"}, "cache": {"password": "pw2"}, "name": "app"}"#;
203        let profile = FileTypeProfile::new(
204            "json",
205            vec![FieldRule::new("*.password").with_category(Category::Custom("pw".into()))],
206        )
207        .with_option("compact", "true");
208
209        let result = proc.process(content, &profile, &store).unwrap();
210        let out: Value = serde_json::from_slice(&result).unwrap();
211
212        assert_ne!(out["db"]["password"].as_str().unwrap(), "pw1");
213        assert_ne!(out["cache"]["password"].as_str().unwrap(), "pw2");
214        assert_eq!(out["name"], "app");
215    }
216}