Skip to main content

arbiter_audit/
redaction.rs

1//! Argument redaction for sensitive fields.
2//!
3//! Walks a JSON value tree and replaces any object key matching a configured
4//! pattern with `"[REDACTED]"`. Patterns use case-insensitive word-boundary
5//! matching (letters only) compiled to regexes, so `key` matches `api_key`
6//! but not `monkey` or `keyboard`.
7
8use regex::Regex;
9use serde::{Deserialize, Serialize};
10
11/// Placeholder text inserted in place of redacted values.
12pub const REDACTED: &str = "[REDACTED]";
13
14/// Configuration for argument redaction.
15#[derive(Debug, Clone, Serialize, Deserialize)]
16pub struct RedactionConfig {
17    /// Case-insensitive patterns matched against JSON object keys using
18    /// letter-boundary matching. A pattern matches when it is not surrounded
19    /// by letters on both sides (underscores, hyphens, digits, and string
20    /// boundaries act as separators).
21    pub patterns: Vec<String>,
22}
23
24impl Default for RedactionConfig {
25    fn default() -> Self {
26        // Expanded default redaction patterns to cover common variants.
27        Self {
28            patterns: vec![
29                "password".into(),
30                "passwd".into(),
31                "pwd".into(),
32                "token".into(),
33                "access_token".into(),
34                "refresh_token".into(),
35                "secret".into(),
36                "client_secret".into(),
37                "key".into(),
38                "api_key".into(),
39                "apikey".into(),
40                "api-key".into(),
41                "authorization".into(),
42                "auth".into(),
43                "credential".into(),
44                "cred".into(),
45                "private".into(),
46                "private_key".into(),
47                "ssn".into(),
48                "social_security".into(),
49                "credit_card".into(),
50                "card_number".into(),
51                "cvv".into(),
52                "cvc".into(),
53            ],
54        }
55    }
56}
57
58/// Pre-compiled redaction patterns for efficient per-request redaction.
59///
60/// Compile once at startup via [`RedactionConfig::compile`] and reuse across
61/// requests. Previously, regexes were compiled on every call to
62/// [`redact_arguments`], adding unnecessary CPU overhead under load.
63#[derive(Debug, Clone)]
64pub struct CompiledRedaction {
65    patterns: Vec<Regex>,
66}
67
68impl CompiledRedaction {
69    /// Redact sensitive fields using pre-compiled patterns.
70    pub fn redact(&self, value: &serde_json::Value) -> serde_json::Value {
71        redact_value(value, &self.patterns)
72    }
73}
74
75impl RedactionConfig {
76    /// Pre-compile patterns into regexes for reuse across requests.
77    pub fn compile(&self) -> CompiledRedaction {
78        let patterns = self
79            .patterns
80            .iter()
81            .filter_map(|p| Regex::new(&format!("(?i){}", regex::escape(p))).ok())
82            .collect();
83        CompiledRedaction { patterns }
84    }
85}
86
87/// Redact sensitive fields in a JSON value based on the given configuration.
88///
89/// Object keys matching any pattern (case-insensitive, letter-boundary) have
90/// their values replaced with `"[REDACTED]"`. The walk is recursive through
91/// objects and arrays.
92///
93/// For hot-path usage, prefer [`RedactionConfig::compile`] + [`CompiledRedaction::redact`]
94/// to avoid recompiling regexes on every call.
95pub fn redact_arguments(value: &serde_json::Value, config: &RedactionConfig) -> serde_json::Value {
96    config.compile().redact(value)
97}
98
99/// Check whether a pattern match has letter-boundaries: the characters
100/// immediately before and after the match must NOT be ASCII letters.
101/// This prevents `key` from matching inside `monkey` or `keyboard`,
102/// while still matching `api_key`, `api-key`, or standalone `key`.
103fn has_letter_boundary_match(key: &str, pattern: &Regex) -> bool {
104    for m in pattern.find_iter(key) {
105        let before = key[..m.start()].chars().next_back();
106        let after = key[m.end()..].chars().next();
107        let preceded_by_letter = before.is_some_and(|c| c.is_ascii_alphabetic());
108        let followed_by_letter = after.is_some_and(|c| c.is_ascii_alphabetic());
109        if !preceded_by_letter && !followed_by_letter {
110            return true;
111        }
112    }
113    false
114}
115
116fn redact_value(value: &serde_json::Value, patterns: &[Regex]) -> serde_json::Value {
117    match value {
118        serde_json::Value::Object(map) => {
119            let mut redacted = serde_json::Map::new();
120            for (k, v) in map {
121                if patterns.iter().any(|p| has_letter_boundary_match(k, p)) {
122                    redacted.insert(k.clone(), serde_json::Value::String(REDACTED.into()));
123                } else {
124                    redacted.insert(k.clone(), redact_value(v, patterns));
125                }
126            }
127            serde_json::Value::Object(redacted)
128        }
129        serde_json::Value::Array(arr) => {
130            serde_json::Value::Array(arr.iter().map(|v| redact_value(v, patterns)).collect())
131        }
132        other => other.clone(),
133    }
134}
135
136#[cfg(test)]
137mod tests {
138    use super::*;
139    use serde_json::json;
140
141    #[test]
142    fn redacts_sensitive_fields() {
143        let config = RedactionConfig::default();
144        let input = json!({
145            "path": "/etc/hosts",
146            "api_key": "sk-12345",
147            "password": "hunter2",
148            "nested": {
149                "access_token": "abc",
150                "count": 42
151            }
152        });
153
154        let redacted = redact_arguments(&input, &config);
155
156        assert_eq!(redacted["path"], "/etc/hosts");
157        assert_eq!(redacted["api_key"], REDACTED);
158        assert_eq!(redacted["password"], REDACTED);
159        assert_eq!(redacted["nested"]["access_token"], REDACTED);
160        assert_eq!(redacted["nested"]["count"], 42);
161    }
162
163    #[test]
164    fn redaction_is_case_insensitive() {
165        let config = RedactionConfig {
166            patterns: vec!["secret".into()],
167        };
168        let input = json!({
169            "SECRET_VALUE": "classified",
170            "my_Secret": "also classified",
171            "public": "visible"
172        });
173
174        let redacted = redact_arguments(&input, &config);
175
176        assert_eq!(redacted["SECRET_VALUE"], REDACTED);
177        assert_eq!(redacted["my_Secret"], REDACTED);
178        assert_eq!(redacted["public"], "visible");
179    }
180
181    #[test]
182    fn redacts_inside_arrays() {
183        let config = RedactionConfig {
184            patterns: vec!["token".into()],
185        };
186        let input = json!([
187            {"token": "abc", "id": 1},
188            {"token": "def", "id": 2}
189        ]);
190
191        let redacted = redact_arguments(&input, &config);
192        let arr = redacted.as_array().unwrap();
193
194        assert_eq!(arr[0]["token"], REDACTED);
195        assert_eq!(arr[0]["id"], 1);
196        assert_eq!(arr[1]["token"], REDACTED);
197    }
198
199    #[test]
200    fn empty_patterns_redact_nothing() {
201        let config = RedactionConfig { patterns: vec![] };
202        let input = json!({"password": "hunter2", "secret": "x"});
203        let redacted = redact_arguments(&input, &config);
204
205        assert_eq!(redacted["password"], "hunter2");
206        assert_eq!(redacted["secret"], "x");
207    }
208
209    #[test]
210    fn scalar_values_pass_through() {
211        let config = RedactionConfig::default();
212        let input = json!("just a string");
213        assert_eq!(redact_arguments(&input, &config), json!("just a string"));
214
215        let input = json!(42);
216        assert_eq!(redact_arguments(&input, &config), json!(42));
217    }
218
219    // -----------------------------------------------------------------------
220    // Redaction over-match (substring matching behavior)
221    // -----------------------------------------------------------------------
222
223    /// Redaction uses word-boundary matching (letter-boundary): pattern "key"
224    /// matches "api_key" and "key_id" (separated by underscore) but NOT
225    /// "monkey" or "keyboard" (embedded in other letters).
226    #[test]
227    fn redaction_is_word_boundary_match() {
228        let config = RedactionConfig {
229            patterns: vec!["key".into()],
230        };
231        let input = json!({
232            "api_key": "secret-1",
233            "key_id": "secret-2",
234            "monkey": "banana",
235            "keyboard": "qwerty",
236            "unrelated": "visible"
237        });
238
239        let redacted = redact_arguments(&input, &config);
240
241        // Fields where "key" appears at a word boundary are redacted.
242        assert_eq!(
243            redacted["api_key"], REDACTED,
244            "api_key has 'key' at boundary"
245        );
246        assert_eq!(redacted["key_id"], REDACTED, "key_id has 'key' at boundary");
247
248        // Fields where "key" is embedded in other letters are NOT redacted.
249        assert_eq!(
250            redacted["monkey"], "banana",
251            "monkey should not be redacted"
252        );
253        assert_eq!(
254            redacted["keyboard"], "qwerty",
255            "keyboard should not be redacted"
256        );
257
258        // Fields without "key" are left alone.
259        assert_eq!(redacted["unrelated"], "visible");
260    }
261
262    /// Pattern "token" matches "tokelau_island" because "token" is NOT a
263    /// substring of "tokelau_island" (different letters: "token" vs "tokel").
264    /// But it DOES match "tokenizer", "access_token", etc.
265    #[test]
266    fn redaction_does_not_match_unrelated() {
267        let config = RedactionConfig {
268            patterns: vec!["token".into()],
269        };
270        let input = json!({
271            "access_token": "secret",
272            "token_type": "bearer",
273            "tokelau_island": "pacific",
274            "notation": "musical"
275        });
276
277        let redacted = redact_arguments(&input, &config);
278
279        // "access_token" and "token_type" contain "token" -> redacted.
280        assert_eq!(redacted["access_token"], REDACTED);
281        assert_eq!(redacted["token_type"], REDACTED);
282
283        // "tokelau_island" does NOT contain "token" -> NOT redacted.
284        assert_eq!(redacted["tokelau_island"], "pacific");
285
286        // "notation" does NOT contain "token" -> NOT redacted.
287        assert_eq!(redacted["notation"], "musical");
288    }
289
290    // -----------------------------------------------------------------------
291    // Deeply nested JSON redaction (no stack overflow)
292    // -----------------------------------------------------------------------
293
294    #[test]
295    fn deeply_nested_json_redaction() {
296        let config = RedactionConfig {
297            patterns: vec!["secret".into()],
298        };
299
300        // Build 10 levels of nesting: {"level": {"level": ... {"secret": "value"}}}
301        let mut value = json!({"secret": "deep-secret-value", "visible": "ok"});
302        for _ in 0..10 {
303            value = json!({"level": value});
304        }
305
306        let redacted = redact_arguments(&value, &config);
307
308        // Walk down 10 levels to verify the deeply nested "secret" was redacted.
309        let mut current = &redacted;
310        for _ in 0..10 {
311            current = &current["level"];
312        }
313        assert_eq!(
314            current["secret"], REDACTED,
315            "deeply nested 'secret' field must be redacted"
316        );
317        assert_eq!(
318            current["visible"], "ok",
319            "non-secret field at depth must be preserved"
320        );
321    }
322
323    #[test]
324    fn does_not_redact_non_sensitive_substrings() {
325        let config = RedactionConfig::default();
326        let input = json!({
327            "keyboard": "mechanical",
328            "monkey": "curious george",
329            "author": "Jane Doe",
330            "authenticate_method": "oauth2"
331        });
332        let redacted = redact_arguments(&input, &config);
333        assert_eq!(
334            redacted["keyboard"], "mechanical",
335            "keyboard should not be redacted"
336        );
337        assert_eq!(
338            redacted["monkey"], "curious george",
339            "monkey should not be redacted"
340        );
341        assert_eq!(
342            redacted["author"], "Jane Doe",
343            "author should not be redacted"
344        );
345        assert_eq!(
346            redacted["authenticate_method"], "oauth2",
347            "authenticate_method should not be redacted"
348        );
349    }
350
351    #[test]
352    fn still_redacts_sensitive_compound_fields() {
353        let config = RedactionConfig::default();
354        let input = json!({
355            "api_key": "sk-12345",
356            "api-key": "sk-67890",
357            "x-auth-token": "bearer-abc",
358            "user_password": "hunter2"
359        });
360        let redacted = redact_arguments(&input, &config);
361        assert_eq!(redacted["api_key"], "[REDACTED]");
362        assert_eq!(redacted["api-key"], "[REDACTED]");
363        assert_eq!(redacted["x-auth-token"], "[REDACTED]");
364        assert_eq!(redacted["user_password"], "[REDACTED]");
365    }
366
367    // ── RT-206: CompiledRedaction direct tests ────────────────────────
368
369    #[test]
370    fn compiled_redaction_matches_redact_arguments() {
371        let config = RedactionConfig::default();
372        let compiled = config.compile();
373        let input = json!({
374            "path": "/etc/hosts",
375            "api_key": "sk-12345",
376            "password": "hunter2",
377            "nested": {
378                "access_token": "abc",
379                "count": 42
380            }
381        });
382
383        let result_compiled = compiled.redact(&input);
384        let result_wrapper = redact_arguments(&input, &config);
385        assert_eq!(
386            result_compiled, result_wrapper,
387            "compiled and wrapper should produce identical output"
388        );
389    }
390
391    #[test]
392    fn compiled_redaction_reusable_across_calls() {
393        let config = RedactionConfig {
394            patterns: vec!["secret".into(), "key".into()],
395        };
396        let compiled = config.compile();
397
398        let input1 = json!({"secret": "val1", "public": "ok"});
399        let input2 = json!({"api_key": "val2", "name": "test"});
400
401        let r1 = compiled.redact(&input1);
402        let r2 = compiled.redact(&input2);
403
404        assert_eq!(r1["secret"], REDACTED);
405        assert_eq!(r1["public"], "ok");
406        assert_eq!(r2["api_key"], REDACTED);
407        assert_eq!(r2["name"], "test");
408    }
409
410    #[test]
411    fn compiled_redaction_empty_patterns() {
412        let config = RedactionConfig { patterns: vec![] };
413        let compiled = config.compile();
414        let input = json!({"password": "hunter2", "secret": "x"});
415        let redacted = compiled.redact(&input);
416        assert_eq!(redacted["password"], "hunter2");
417        assert_eq!(redacted["secret"], "x");
418    }
419}