Skip to main content

arbiter_audit/
redaction.rs

1//! Argument redaction for sensitive fields.
2//!
3//! Walks a JSON value tree and replaces any object key matching a configured
4//! pattern with `"[REDACTED]"`. Patterns use case-insensitive word-boundary
5//! matching (letters only) compiled to regexes, so `key` matches `api_key`
6//! but not `monkey` or `keyboard`.
7
8use regex::Regex;
9use serde::{Deserialize, Serialize};
10
11/// Placeholder text inserted in place of redacted values.
12pub const REDACTED: &str = "[REDACTED]";
13
14/// Configuration for argument redaction.
15#[derive(Debug, Clone, Serialize, Deserialize)]
16pub struct RedactionConfig {
17    /// Case-insensitive patterns matched against JSON object keys using
18    /// letter-boundary matching. A pattern matches when it is not surrounded
19    /// by letters on both sides (underscores, hyphens, digits, and string
20    /// boundaries act as separators).
21    pub patterns: Vec<String>,
22}
23
24impl Default for RedactionConfig {
25    fn default() -> Self {
26        // Expanded default redaction patterns to cover common variants.
27        Self {
28            patterns: vec![
29                "password".into(),
30                "passwd".into(),
31                "pwd".into(),
32                "token".into(),
33                "access_token".into(),
34                "refresh_token".into(),
35                "secret".into(),
36                "client_secret".into(),
37                "key".into(),
38                "api_key".into(),
39                "apikey".into(),
40                "api-key".into(),
41                "authorization".into(),
42                "auth".into(),
43                "credential".into(),
44                "cred".into(),
45                "private".into(),
46                "private_key".into(),
47                "ssn".into(),
48                "social_security".into(),
49                "credit_card".into(),
50                "card_number".into(),
51                "cvv".into(),
52                "cvc".into(),
53            ],
54        }
55    }
56}
57
58/// Pre-compiled redaction patterns for efficient per-request redaction.
59///
60/// Compile once at startup via [`RedactionConfig::compile`] and reuse across
61/// requests. Previously, regexes were compiled on every call to
62/// [`redact_arguments`], adding unnecessary CPU overhead under load.
63#[derive(Debug, Clone)]
64pub struct CompiledRedaction {
65    patterns: Vec<Regex>,
66}
67
68impl CompiledRedaction {
69    /// Redact sensitive fields using pre-compiled patterns.
70    pub fn redact(&self, value: &serde_json::Value) -> serde_json::Value {
71        redact_value(value, &self.patterns)
72    }
73}
74
75impl RedactionConfig {
76    /// Pre-compile patterns into regexes for reuse across requests.
77    pub fn compile(&self) -> CompiledRedaction {
78        let patterns = self
79            .patterns
80            .iter()
81            .filter_map(|p| Regex::new(&format!("(?i){}", regex::escape(p))).ok())
82            .collect();
83        CompiledRedaction { patterns }
84    }
85}
86
87/// Redact sensitive fields in a JSON value based on the given configuration.
88///
89/// Object keys matching any pattern (case-insensitive, letter-boundary) have
90/// their values replaced with `"[REDACTED]"`. The walk is recursive through
91/// objects and arrays.
92///
93/// For hot-path usage, prefer [`RedactionConfig::compile`] + [`CompiledRedaction::redact`]
94/// to avoid recompiling regexes on every call.
95pub fn redact_arguments(value: &serde_json::Value, config: &RedactionConfig) -> serde_json::Value {
96    config.compile().redact(value)
97}
98
99/// Check whether a pattern match has letter-boundaries: the characters
100/// immediately before and after the match must NOT be ASCII letters.
101/// This prevents `key` from matching inside `monkey` or `keyboard`,
102/// while still matching `api_key`, `api-key`, or standalone `key`.
103fn has_letter_boundary_match(key: &str, pattern: &Regex) -> bool {
104    for m in pattern.find_iter(key) {
105        let before = key[..m.start()].chars().next_back();
106        let after = key[m.end()..].chars().next();
107        let preceded_by_letter = before.is_some_and(|c| c.is_ascii_alphabetic());
108        let followed_by_letter = after.is_some_and(|c| c.is_ascii_alphabetic());
109        if !preceded_by_letter && !followed_by_letter {
110            return true;
111        }
112    }
113    false
114}
115
116/// Maximum recursion depth for redaction to prevent stack overflow
117/// from adversarially deep JSON structures.
118const MAX_REDACTION_DEPTH: usize = 64;
119
120fn redact_value(value: &serde_json::Value, patterns: &[Regex]) -> serde_json::Value {
121    redact_value_depth(value, patterns, 0)
122}
123
124fn redact_value_depth(
125    value: &serde_json::Value,
126    patterns: &[Regex],
127    depth: usize,
128) -> serde_json::Value {
129    if depth >= MAX_REDACTION_DEPTH {
130        // Truncate at max depth to prevent stack overflow.
131        return serde_json::Value::String("[TRUNCATED: max redaction depth]".into());
132    }
133    match value {
134        serde_json::Value::Object(map) => {
135            let mut redacted = serde_json::Map::new();
136            for (k, v) in map {
137                if patterns.iter().any(|p| has_letter_boundary_match(k, p)) {
138                    redacted.insert(k.clone(), serde_json::Value::String(REDACTED.into()));
139                } else {
140                    redacted.insert(k.clone(), redact_value_depth(v, patterns, depth + 1));
141                }
142            }
143            serde_json::Value::Object(redacted)
144        }
145        serde_json::Value::Array(arr) => serde_json::Value::Array(
146            arr.iter()
147                .map(|v| redact_value_depth(v, patterns, depth + 1))
148                .collect(),
149        ),
150        other => other.clone(),
151    }
152}
153
154#[cfg(test)]
155mod tests {
156    use super::*;
157    use serde_json::json;
158
159    #[test]
160    fn redacts_sensitive_fields() {
161        let config = RedactionConfig::default();
162        let input = json!({
163            "path": "/etc/hosts",
164            "api_key": "sk-12345",
165            "password": "hunter2",
166            "nested": {
167                "access_token": "abc",
168                "count": 42
169            }
170        });
171
172        let redacted = redact_arguments(&input, &config);
173
174        assert_eq!(redacted["path"], "/etc/hosts");
175        assert_eq!(redacted["api_key"], REDACTED);
176        assert_eq!(redacted["password"], REDACTED);
177        assert_eq!(redacted["nested"]["access_token"], REDACTED);
178        assert_eq!(redacted["nested"]["count"], 42);
179    }
180
181    #[test]
182    fn redaction_is_case_insensitive() {
183        let config = RedactionConfig {
184            patterns: vec!["secret".into()],
185        };
186        let input = json!({
187            "SECRET_VALUE": "classified",
188            "my_Secret": "also classified",
189            "public": "visible"
190        });
191
192        let redacted = redact_arguments(&input, &config);
193
194        assert_eq!(redacted["SECRET_VALUE"], REDACTED);
195        assert_eq!(redacted["my_Secret"], REDACTED);
196        assert_eq!(redacted["public"], "visible");
197    }
198
199    #[test]
200    fn redacts_inside_arrays() {
201        let config = RedactionConfig {
202            patterns: vec!["token".into()],
203        };
204        let input = json!([
205            {"token": "abc", "id": 1},
206            {"token": "def", "id": 2}
207        ]);
208
209        let redacted = redact_arguments(&input, &config);
210        let arr = redacted.as_array().unwrap();
211
212        assert_eq!(arr[0]["token"], REDACTED);
213        assert_eq!(arr[0]["id"], 1);
214        assert_eq!(arr[1]["token"], REDACTED);
215    }
216
217    #[test]
218    fn empty_patterns_redact_nothing() {
219        let config = RedactionConfig { patterns: vec![] };
220        let input = json!({"password": "hunter2", "secret": "x"});
221        let redacted = redact_arguments(&input, &config);
222
223        assert_eq!(redacted["password"], "hunter2");
224        assert_eq!(redacted["secret"], "x");
225    }
226
227    #[test]
228    fn scalar_values_pass_through() {
229        let config = RedactionConfig::default();
230        let input = json!("just a string");
231        assert_eq!(redact_arguments(&input, &config), json!("just a string"));
232
233        let input = json!(42);
234        assert_eq!(redact_arguments(&input, &config), json!(42));
235    }
236
237    // -----------------------------------------------------------------------
238    // Redaction over-match (substring matching behavior)
239    // -----------------------------------------------------------------------
240
241    /// Redaction uses word-boundary matching (letter-boundary): pattern "key"
242    /// matches "api_key" and "key_id" (separated by underscore) but NOT
243    /// "monkey" or "keyboard" (embedded in other letters).
244    #[test]
245    fn redaction_is_word_boundary_match() {
246        let config = RedactionConfig {
247            patterns: vec!["key".into()],
248        };
249        let input = json!({
250            "api_key": "secret-1",
251            "key_id": "secret-2",
252            "monkey": "banana",
253            "keyboard": "qwerty",
254            "unrelated": "visible"
255        });
256
257        let redacted = redact_arguments(&input, &config);
258
259        // Fields where "key" appears at a word boundary are redacted.
260        assert_eq!(
261            redacted["api_key"], REDACTED,
262            "api_key has 'key' at boundary"
263        );
264        assert_eq!(redacted["key_id"], REDACTED, "key_id has 'key' at boundary");
265
266        // Fields where "key" is embedded in other letters are NOT redacted.
267        assert_eq!(
268            redacted["monkey"], "banana",
269            "monkey should not be redacted"
270        );
271        assert_eq!(
272            redacted["keyboard"], "qwerty",
273            "keyboard should not be redacted"
274        );
275
276        // Fields without "key" are left alone.
277        assert_eq!(redacted["unrelated"], "visible");
278    }
279
280    /// Pattern "token" matches "tokelau_island" because "token" is NOT a
281    /// substring of "tokelau_island" (different letters: "token" vs "tokel").
282    /// But it DOES match "tokenizer", "access_token", etc.
283    #[test]
284    fn redaction_does_not_match_unrelated() {
285        let config = RedactionConfig {
286            patterns: vec!["token".into()],
287        };
288        let input = json!({
289            "access_token": "secret",
290            "token_type": "bearer",
291            "tokelau_island": "pacific",
292            "notation": "musical"
293        });
294
295        let redacted = redact_arguments(&input, &config);
296
297        // "access_token" and "token_type" contain "token" -> redacted.
298        assert_eq!(redacted["access_token"], REDACTED);
299        assert_eq!(redacted["token_type"], REDACTED);
300
301        // "tokelau_island" does NOT contain "token" -> NOT redacted.
302        assert_eq!(redacted["tokelau_island"], "pacific");
303
304        // "notation" does NOT contain "token" -> NOT redacted.
305        assert_eq!(redacted["notation"], "musical");
306    }
307
308    // -----------------------------------------------------------------------
309    // Deeply nested JSON redaction (no stack overflow)
310    // -----------------------------------------------------------------------
311
312    #[test]
313    fn deeply_nested_json_redaction() {
314        let config = RedactionConfig {
315            patterns: vec!["secret".into()],
316        };
317
318        // Build 10 levels of nesting: {"level": {"level": ... {"secret": "value"}}}
319        let mut value = json!({"secret": "deep-secret-value", "visible": "ok"});
320        for _ in 0..10 {
321            value = json!({"level": value});
322        }
323
324        let redacted = redact_arguments(&value, &config);
325
326        // Walk down 10 levels to verify the deeply nested "secret" was redacted.
327        let mut current = &redacted;
328        for _ in 0..10 {
329            current = &current["level"];
330        }
331        assert_eq!(
332            current["secret"], REDACTED,
333            "deeply nested 'secret' field must be redacted"
334        );
335        assert_eq!(
336            current["visible"], "ok",
337            "non-secret field at depth must be preserved"
338        );
339    }
340
341    #[test]
342    fn does_not_redact_non_sensitive_substrings() {
343        let config = RedactionConfig::default();
344        let input = json!({
345            "keyboard": "mechanical",
346            "monkey": "curious george",
347            "author": "Jane Doe",
348            "authenticate_method": "oauth2"
349        });
350        let redacted = redact_arguments(&input, &config);
351        assert_eq!(
352            redacted["keyboard"], "mechanical",
353            "keyboard should not be redacted"
354        );
355        assert_eq!(
356            redacted["monkey"], "curious george",
357            "monkey should not be redacted"
358        );
359        assert_eq!(
360            redacted["author"], "Jane Doe",
361            "author should not be redacted"
362        );
363        assert_eq!(
364            redacted["authenticate_method"], "oauth2",
365            "authenticate_method should not be redacted"
366        );
367    }
368
369    #[test]
370    fn still_redacts_sensitive_compound_fields() {
371        let config = RedactionConfig::default();
372        let input = json!({
373            "api_key": "sk-12345",
374            "api-key": "sk-67890",
375            "x-auth-token": "bearer-abc",
376            "user_password": "hunter2"
377        });
378        let redacted = redact_arguments(&input, &config);
379        assert_eq!(redacted["api_key"], "[REDACTED]");
380        assert_eq!(redacted["api-key"], "[REDACTED]");
381        assert_eq!(redacted["x-auth-token"], "[REDACTED]");
382        assert_eq!(redacted["user_password"], "[REDACTED]");
383    }
384
385    // ── RT-206: CompiledRedaction direct tests ────────────────────────
386
387    #[test]
388    fn compiled_redaction_matches_redact_arguments() {
389        let config = RedactionConfig::default();
390        let compiled = config.compile();
391        let input = json!({
392            "path": "/etc/hosts",
393            "api_key": "sk-12345",
394            "password": "hunter2",
395            "nested": {
396                "access_token": "abc",
397                "count": 42
398            }
399        });
400
401        let result_compiled = compiled.redact(&input);
402        let result_wrapper = redact_arguments(&input, &config);
403        assert_eq!(
404            result_compiled, result_wrapper,
405            "compiled and wrapper should produce identical output"
406        );
407    }
408
409    #[test]
410    fn compiled_redaction_reusable_across_calls() {
411        let config = RedactionConfig {
412            patterns: vec!["secret".into(), "key".into()],
413        };
414        let compiled = config.compile();
415
416        let input1 = json!({"secret": "val1", "public": "ok"});
417        let input2 = json!({"api_key": "val2", "name": "test"});
418
419        let r1 = compiled.redact(&input1);
420        let r2 = compiled.redact(&input2);
421
422        assert_eq!(r1["secret"], REDACTED);
423        assert_eq!(r1["public"], "ok");
424        assert_eq!(r2["api_key"], REDACTED);
425        assert_eq!(r2["name"], "test");
426    }
427
428    #[test]
429    fn compiled_redaction_empty_patterns() {
430        let config = RedactionConfig { patterns: vec![] };
431        let compiled = config.compile();
432        let input = json!({"password": "hunter2", "secret": "x"});
433        let redacted = compiled.redact(&input);
434        assert_eq!(redacted["password"], "hunter2");
435        assert_eq!(redacted["secret"], "x");
436    }
437}