Skip to main content

sanitize_engine/processor/
json_proc.rs

1//! JSON structured processor.
2//!
3//! Parses JSON input, walks the value tree, replaces values at matched
4//! key paths, and serializes back to JSON preserving structure.
5//!
6//! # Key Paths
7//!
8//! Nested keys are expressed as dot-separated paths:
9//! `database.password`, `smtp.credentials.user`.
10//!
11//! Array elements are traversed transparently — a rule for `users.email`
12//! matches the `email` field inside every object in the `users` array.
13
14use crate::error::{Result, SanitizeError};
15use crate::processor::{find_matching_rule, replace_value, FileTypeProfile, Processor};
16use crate::store::MappingStore;
17use serde_json::Value;
18
19/// Maximum recursion depth for walking JSON value trees.
20/// Prevents stack overflow from deeply nested or malicious inputs (R-4 fix).
21const MAX_JSON_DEPTH: usize = 128;
22
23/// Maximum allowed input size (bytes) for JSON processing (F-04 fix).
24/// Inputs exceeding this are rejected before parsing.
25const MAX_JSON_INPUT_SIZE: usize = 256 * 1024 * 1024; // 256 MiB
26
27/// Structured processor for JSON files.
28pub struct JsonProcessor;
29
30impl Processor for JsonProcessor {
31    fn name(&self) -> &'static str {
32        "json"
33    }
34
35    fn can_handle(&self, content: &[u8], profile: &FileTypeProfile) -> bool {
36        if profile.processor == "json" {
37            return true;
38        }
39        // Heuristic: starts with `{` or `[` after optional whitespace.
40        let trimmed = content.iter().copied().find(|b| !b.is_ascii_whitespace());
41        matches!(trimmed, Some(b'{' | b'['))
42    }
43
44    fn process(
45        &self,
46        content: &[u8],
47        profile: &FileTypeProfile,
48        store: &MappingStore,
49    ) -> Result<Vec<u8>> {
50        // F-04 fix: enforce input size limit.
51        if content.len() > MAX_JSON_INPUT_SIZE {
52            return Err(SanitizeError::InputTooLarge {
53                size: content.len(),
54                limit: MAX_JSON_INPUT_SIZE,
55            });
56        }
57
58        let text = std::str::from_utf8(content).map_err(|e| SanitizeError::ParseError {
59            format: "JSON".into(),
60            message: format!("invalid UTF-8: {}", e),
61        })?;
62
63        let mut value: Value =
64            serde_json::from_str(text).map_err(|e| SanitizeError::ParseError {
65                format: "JSON".into(),
66                message: format!("JSON parse error: {}", e),
67            })?;
68
69        walk_json(&mut value, "", profile, store, 0)?;
70
71        let compact = profile.options.get("compact").is_some_and(|v| v == "true");
72
73        let output = if compact {
74            serde_json::to_vec(&value)
75        } else {
76            serde_json::to_vec_pretty(&value)
77        }
78        .map_err(|e| SanitizeError::IoError(format!("JSON serialize error: {}", e)))?;
79
80        Ok(output)
81    }
82}
83
84/// Recursively walk a JSON value, replacing matched fields.
85///
86/// `depth` tracks the current recursion level; exceeding `MAX_JSON_DEPTH`
87/// returns an error instead of risking a stack overflow.
88fn walk_json(
89    value: &mut Value,
90    prefix: &str,
91    profile: &FileTypeProfile,
92    store: &MappingStore,
93    depth: usize,
94) -> Result<()> {
95    if depth > MAX_JSON_DEPTH {
96        return Err(SanitizeError::RecursionDepthExceeded(format!(
97            "JSON recursion depth exceeds limit of {MAX_JSON_DEPTH}"
98        )));
99    }
100    match value {
101        Value::Object(map) => {
102            let keys: Vec<String> = map.keys().cloned().collect();
103            for key in keys {
104                let path = if prefix.is_empty() {
105                    key.clone()
106                } else {
107                    format!("{}.{}", prefix, key)
108                };
109
110                if let Some(v) = map.get_mut(&key) {
111                    match v {
112                        Value::String(s) => {
113                            if let Some(rule) = find_matching_rule(&path, profile) {
114                                *s = replace_value(s, rule, store)?;
115                            }
116                        }
117                        Value::Number(_) | Value::Bool(_) => {
118                            if let Some(rule) = find_matching_rule(&path, profile) {
119                                let repr = v.to_string();
120                                let replaced = replace_value(&repr, rule, store)?;
121                                *v = Value::String(replaced);
122                            }
123                        }
124                        Value::Object(_) | Value::Array(_) => {
125                            walk_json(v, &path, profile, store, depth + 1)?;
126                        }
127                        Value::Null => {}
128                    }
129                }
130            }
131        }
132        Value::Array(arr) => {
133            for item in arr.iter_mut() {
134                walk_json(item, prefix, profile, store, depth + 1)?;
135            }
136        }
137        _ => {}
138    }
139    Ok(())
140}
141
142#[cfg(test)]
143mod tests {
144    use super::*;
145    use crate::category::Category;
146    use crate::generator::HmacGenerator;
147    use crate::processor::profile::FieldRule;
148    use std::sync::Arc;
149
150    fn make_store() -> MappingStore {
151        let gen = Arc::new(HmacGenerator::new([42u8; 32]));
152        MappingStore::new(gen, None)
153    }
154
155    #[test]
156    fn basic_json_replacement() {
157        let store = make_store();
158        let proc = JsonProcessor;
159
160        let content =
161            br#"{"database": {"host": "db.corp.com", "password": "s3cret"}, "port": 5432}"#;
162        let profile = FileTypeProfile::new(
163            "json",
164            vec![
165                FieldRule::new("database.password").with_category(Category::Custom("pw".into())),
166                FieldRule::new("database.host").with_category(Category::Hostname),
167            ],
168        )
169        .with_option("compact", "true");
170
171        let result = proc.process(content, &profile, &store).unwrap();
172        let out: Value = serde_json::from_slice(&result).unwrap();
173
174        assert_ne!(out["database"]["password"].as_str().unwrap(), "s3cret");
175        assert_ne!(out["database"]["host"].as_str().unwrap(), "db.corp.com");
176        assert_eq!(out["port"], 5432);
177    }
178
179    #[test]
180    fn json_array_traversal() {
181        let store = make_store();
182        let proc = JsonProcessor;
183
184        let content = br#"{"users": [{"email": "a@b.com"}, {"email": "c@d.com"}]}"#;
185        let profile = FileTypeProfile::new(
186            "json",
187            vec![FieldRule::new("users.email").with_category(Category::Email)],
188        )
189        .with_option("compact", "true");
190
191        let result = proc.process(content, &profile, &store).unwrap();
192        let out: Value = serde_json::from_slice(&result).unwrap();
193
194        let users = out["users"].as_array().unwrap();
195        assert_ne!(users[0]["email"].as_str().unwrap(), "a@b.com");
196        assert_ne!(users[1]["email"].as_str().unwrap(), "c@d.com");
197    }
198
199    #[test]
200    fn json_glob_suffix_pattern() {
201        let store = make_store();
202        let proc = JsonProcessor;
203
204        let content =
205            br#"{"db": {"password": "pw1"}, "cache": {"password": "pw2"}, "name": "app"}"#;
206        let profile = FileTypeProfile::new(
207            "json",
208            vec![FieldRule::new("*.password").with_category(Category::Custom("pw".into()))],
209        )
210        .with_option("compact", "true");
211
212        let result = proc.process(content, &profile, &store).unwrap();
213        let out: Value = serde_json::from_slice(&result).unwrap();
214
215        assert_ne!(out["db"]["password"].as_str().unwrap(), "pw1");
216        assert_ne!(out["cache"]["password"].as_str().unwrap(), "pw2");
217        assert_eq!(out["name"], "app");
218    }
219}