Skip to main content

victauri_plugin/
redaction.rs

1use regex::RegexSet;
2
3const BUILTIN_PATTERNS: &[&str] = &[
4    // API keys: sk-..., pk-..., key-... (20+ chars)
5    r"(?i)\b(sk|pk|key)[-_][a-zA-Z0-9]{20,}\b",
6    // Bearer tokens in output
7    r"(?i)bearer\s+[a-zA-Z0-9\-_.~+/]{20,}",
8    // AWS keys
9    r"\bAKIA[0-9A-Z]{16}\b",
10    // JWT tokens (3 base64 sections separated by dots)
11    r"\beyJ[a-zA-Z0-9_-]{10,}\.[a-zA-Z0-9_-]{10,}\.[a-zA-Z0-9_-]{10,}\b",
12    // Generic long hex secrets (40+ hex chars — SHA1 hashes, API keys)
13    r"\b[0-9a-fA-F]{40,}\b",
14    // Email addresses
15    r"\b[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}\b",
16    // Credit card numbers (basic patterns)
17    r"\b\d{4}[- ]?\d{4}[- ]?\d{4}[- ]?\d{4}\b",
18    // OpenAI-style keys
19    r"\bsk-[a-zA-Z0-9]{32,}\b",
20    // Anthropic keys
21    r"\bsk-ant-[a-zA-Z0-9\-]{20,}\b",
22    // GitHub tokens
23    r"\b(ghp|gho|ghu|ghs|ghr)_[a-zA-Z0-9]{36,}\b",
24    // Stripe keys
25    r"\b(sk|pk|rk)_(test|live)_[a-zA-Z0-9]{20,}\b",
26];
27
28const SENSITIVE_JSON_KEYS: &[&str] = &[
29    "api_key",
30    "apiKey",
31    "api-key",
32    "secret",
33    "password",
34    "passwd",
35    "token",
36    "access_token",
37    "refresh_token",
38    "private_key",
39    "privateKey",
40    "secret_key",
41    "secretKey",
42    "authorization",
43    "auth_token",
44    "session_token",
45    "cookie",
46    "credentials",
47    "ssn",
48    "credit_card",
49    "card_number",
50];
51
52/// Output redactor that scrubs API keys, tokens, emails, and sensitive JSON keys
53/// from MCP tool output. Applies built-in patterns plus optional custom regexes.
54pub struct Redactor {
55    builtin_set: RegexSet,
56    builtin_compiled: Vec<regex::Regex>,
57    custom_set: Option<RegexSet>,
58    custom_compiled: Vec<regex::Regex>,
59}
60
61impl Redactor {
62    /// Build a redactor with custom patterns.
63    ///
64    /// # Errors
65    ///
66    /// Returns [`regex::Error`] if any custom pattern (or a built-in pattern) fails to compile.
67    pub fn try_new(custom_patterns: &[String]) -> Result<Self, regex::Error> {
68        let builtin_set = RegexSet::new(BUILTIN_PATTERNS)?;
69        let builtin_compiled: Vec<regex::Regex> = BUILTIN_PATTERNS
70            .iter()
71            .filter_map(|p| regex::Regex::new(p).ok())
72            .collect();
73
74        let (custom_set, custom_compiled) = if custom_patterns.is_empty() {
75            (None, Vec::new())
76        } else {
77            let set = RegexSet::new(custom_patterns)?;
78            let compiled: Vec<regex::Regex> = custom_patterns
79                .iter()
80                .map(|p| regex::Regex::new(p))
81                .collect::<Result<Vec<_>, _>>()?;
82            (Some(set), compiled)
83        };
84
85        Ok(Self {
86            builtin_set,
87            builtin_compiled,
88            custom_set,
89            custom_compiled,
90        })
91    }
92
93    /// Build a redactor with custom patterns, logging a warning and skipping any invalid patterns.
94    ///
95    /// # Panics
96    ///
97    /// Panics if the built-in redaction patterns fail to compile (this is a bug).
98    pub fn new(custom_patterns: &[String]) -> Self {
99        let builtin_set =
100            RegexSet::new(BUILTIN_PATTERNS).expect("builtin redaction patterns must compile");
101        let builtin_compiled: Vec<regex::Regex> = BUILTIN_PATTERNS
102            .iter()
103            .filter_map(|p| regex::Regex::new(p).ok())
104            .collect();
105
106        let (custom_set, custom_compiled) = if custom_patterns.is_empty() {
107            (None, Vec::new())
108        } else {
109            match RegexSet::new(custom_patterns) {
110                Ok(set) => {
111                    let compiled: Vec<regex::Regex> = custom_patterns
112                        .iter()
113                        .filter_map(|p| regex::Regex::new(p).ok())
114                        .collect();
115                    (Some(set), compiled)
116                }
117                Err(e) => {
118                    tracing::warn!("Failed to compile custom redaction patterns: {e}");
119                    (None, Vec::new())
120                }
121            }
122        };
123
124        Self {
125            builtin_set,
126            builtin_compiled,
127            custom_set,
128            custom_compiled,
129        }
130    }
131
132    /// Scrub sensitive data from `input` using regex patterns and JSON-key matching.
133    #[must_use]
134    pub fn redact(&self, input: &str) -> String {
135        let mut output = self.redact_regex(input);
136        output = self.redact_json_keys(&output);
137        output
138    }
139
140    fn redact_regex(&self, input: &str) -> String {
141        let has_builtin = self.builtin_set.is_match(input);
142        let has_custom = self.custom_set.as_ref().is_some_and(|c| c.is_match(input));
143
144        if !has_builtin && !has_custom {
145            return input.to_string();
146        }
147
148        let mut output = input.to_string();
149
150        if has_builtin {
151            for re in &self.builtin_compiled {
152                output = re.replace_all(&output, "[REDACTED]").to_string();
153            }
154        }
155
156        if has_custom {
157            for re in &self.custom_compiled {
158                output = re.replace_all(&output, "[REDACTED]").to_string();
159            }
160        }
161
162        output
163    }
164
165    fn redact_json_keys(&self, input: &str) -> String {
166        if let Ok(value) = serde_json::from_str::<serde_json::Value>(input) {
167            if !json_has_sensitive_keys(&value) {
168                return input.to_string();
169            }
170            let redacted = redact_json_value(&value);
171            serde_json::to_string(&redacted).unwrap_or_else(|_| input.to_string())
172        } else {
173            input.to_string()
174        }
175    }
176}
177
178fn json_has_sensitive_keys(value: &serde_json::Value) -> bool {
179    match value {
180        serde_json::Value::Object(map) => {
181            for (key, val) in map {
182                let lower_key = key.to_lowercase();
183                if SENSITIVE_JSON_KEYS
184                    .iter()
185                    .any(|k| lower_key.contains(&k.to_lowercase()))
186                {
187                    return true;
188                }
189                if json_has_sensitive_keys(val) {
190                    return true;
191                }
192            }
193            false
194        }
195        serde_json::Value::Array(arr) => arr.iter().any(json_has_sensitive_keys),
196        _ => false,
197    }
198}
199
200fn redact_json_value(value: &serde_json::Value) -> serde_json::Value {
201    match value {
202        serde_json::Value::Object(map) => {
203            let mut new_map = serde_json::Map::new();
204            for (key, val) in map {
205                let lower_key = key.to_lowercase();
206                if SENSITIVE_JSON_KEYS
207                    .iter()
208                    .any(|k| lower_key.contains(&k.to_lowercase()))
209                {
210                    if val.is_string() || val.is_number() {
211                        new_map.insert(key.clone(), serde_json::Value::String("[REDACTED]".into()));
212                    } else if val.is_boolean() {
213                        // Booleans like has_api_key: true are safe — they indicate presence, not value
214                        new_map.insert(key.clone(), val.clone());
215                    } else {
216                        new_map.insert(key.clone(), serde_json::Value::String("[REDACTED]".into()));
217                    }
218                } else {
219                    new_map.insert(key.clone(), redact_json_value(val));
220                }
221            }
222            serde_json::Value::Object(new_map)
223        }
224        serde_json::Value::Array(arr) => {
225            serde_json::Value::Array(arr.iter().map(redact_json_value).collect())
226        }
227        other => other.clone(),
228    }
229}
230
231impl Default for Redactor {
232    fn default() -> Self {
233        Self::new(&[])
234    }
235}
236
237#[cfg(test)]
238mod tests {
239    use super::*;
240
241    #[test]
242    fn redacts_api_keys() {
243        let r = Redactor::default();
244        assert!(
245            r.redact("key is sk-abc123def456ghi789jkl012mno")
246                .contains("[REDACTED]")
247        );
248    }
249
250    #[test]
251    fn redacts_bearer_tokens() {
252        let r = Redactor::default();
253        let input = "Authorization: Bearer eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJzdWIiOiIxMjM0NTY3ODkwIn0.dozjgNryP4J3jVmNHl0w5N_XgL0n3I9PlFUP0THsR8U";
254        let output = r.redact(input);
255        assert!(output.contains("[REDACTED]"));
256        assert!(!output.contains("eyJhbGci"));
257    }
258
259    #[test]
260    fn redacts_emails() {
261        let r = Redactor::default();
262        assert!(
263            r.redact("contact user@example.com for help")
264                .contains("[REDACTED]")
265        );
266    }
267
268    #[test]
269    fn passes_through_clean_text() {
270        let r = Redactor::default();
271        let input = r#"{"ok": true, "title": "My App"}"#;
272        assert_eq!(r.redact(input), input);
273    }
274
275    #[test]
276    fn custom_patterns_work() {
277        let r = Redactor::new(&["secret_\\w+".to_string()]);
278        assert!(
279            r.redact("found secret_project_alpha here")
280                .contains("[REDACTED]")
281        );
282    }
283
284    #[test]
285    fn redacts_json_sensitive_keys() {
286        let r = Redactor::default();
287        let input = r#"{"api_key":"sk-test-12345","name":"John","token":"abc123"}"#;
288        let output = r.redact(input);
289        assert!(output.contains("[REDACTED]"));
290        assert!(output.contains("John"));
291        assert!(!output.contains("sk-test-12345"));
292    }
293
294    #[test]
295    fn preserves_boolean_sensitive_keys() {
296        let r = Redactor::default();
297        let input = r#"{"has_api_key":true,"api_key":"secret-value-here"}"#;
298        let output = r.redact(input);
299        let parsed: serde_json::Value = serde_json::from_str(&output).unwrap();
300        assert_eq!(parsed["has_api_key"], serde_json::Value::Bool(true));
301        assert_eq!(
302            parsed["api_key"],
303            serde_json::Value::String("[REDACTED]".into())
304        );
305    }
306
307    #[test]
308    fn redacts_nested_json_keys() {
309        let r = Redactor::default();
310        let input = r#"{"config":{"llm":{"api_key":"sk-live-xxx","model":"gpt-4"}}}"#;
311        let output = r.redact(input);
312        assert!(output.contains("[REDACTED]"));
313        assert!(output.contains("gpt-4"));
314        assert!(!output.contains("sk-live-xxx"));
315    }
316
317    #[test]
318    fn redacts_github_tokens() {
319        let r = Redactor::default();
320        assert!(
321            r.redact("ghp_ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmno")
322                .contains("[REDACTED]")
323        );
324    }
325
326    #[test]
327    fn redacts_stripe_keys() {
328        let r = Redactor::default();
329        assert!(
330            r.redact("sk_test_ABCDEFGHIJKLMNOPQRSTUVWXYZab")
331                .contains("[REDACTED]")
332        );
333    }
334
335    #[test]
336    fn try_new_valid_patterns() {
337        let r = Redactor::try_new(&["secret_\\w+".to_string()]);
338        assert!(r.is_ok());
339        let r = r.unwrap();
340        assert!(r.redact("found secret_alpha here").contains("[REDACTED]"));
341    }
342
343    #[test]
344    fn try_new_invalid_pattern_returns_error() {
345        let r = Redactor::try_new(&["[invalid".to_string()]);
346        assert!(r.is_err());
347    }
348
349    #[test]
350    fn try_new_empty_patterns() {
351        let r = Redactor::try_new(&[]);
352        assert!(r.is_ok());
353    }
354}