Skip to main content

victauri_plugin/
redaction.rs

1use regex::RegexSet;
2
3const BUILTIN_PATTERNS: &[&str] = &[
4    // API keys: sk-..., pk-..., key-... (20+ chars)
5    r"(?i)\b(sk|pk|key)[-_][a-zA-Z0-9]{20,}\b",
6    // Bearer tokens in output
7    r"(?i)bearer\s+[a-zA-Z0-9\-_.~+/]{20,}",
8    // AWS keys
9    r"\bAKIA[0-9A-Z]{16}\b",
10    // JWT tokens (3 base64 sections separated by dots)
11    r"\beyJ[a-zA-Z0-9_-]{10,}\.[a-zA-Z0-9_-]{10,}\.[a-zA-Z0-9_-]{10,}\b",
12    // Generic long hex secrets (40+ hex chars — SHA1 hashes, API keys)
13    r"\b[0-9a-fA-F]{40,}\b",
14    // Email addresses
15    r"\b[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}\b",
16    // Credit card numbers (basic patterns)
17    r"\b\d{4}[- ]?\d{4}[- ]?\d{4}[- ]?\d{4}\b",
18    // OpenAI-style keys
19    r"\bsk-[a-zA-Z0-9]{32,}\b",
20    // Anthropic keys
21    r"\bsk-ant-[a-zA-Z0-9\-]{20,}\b",
22    // GitHub tokens
23    r"\b(ghp|gho|ghu|ghs|ghr)_[a-zA-Z0-9]{36,}\b",
24    // Stripe keys
25    r"\b(sk|pk|rk)_(test|live)_[a-zA-Z0-9]{20,}\b",
26];
27
28const SENSITIVE_JSON_KEYS: &[&str] = &[
29    "api_key",
30    "apiKey",
31    "api-key",
32    "secret",
33    "password",
34    "passwd",
35    "token",
36    "access_token",
37    "refresh_token",
38    "private_key",
39    "privateKey",
40    "secret_key",
41    "secretKey",
42    "authorization",
43    "auth_token",
44    "session_token",
45    "cookie",
46    "credentials",
47    "ssn",
48    "credit_card",
49    "card_number",
50];
51
52/// Output redactor that scrubs API keys, tokens, emails, and sensitive JSON keys
53/// from MCP tool output. Applies built-in patterns plus optional custom regexes.
54pub struct Redactor {
55    builtin_set: RegexSet,
56    builtin_compiled: Vec<regex::Regex>,
57    custom_set: Option<RegexSet>,
58    custom_compiled: Vec<regex::Regex>,
59}
60
61impl Redactor {
62    /// Build a redactor with custom patterns.
63    ///
64    /// # Errors
65    ///
66    /// Returns [`regex::Error`] if any custom pattern (or a built-in pattern) fails to compile.
67    pub fn try_new(custom_patterns: &[String]) -> Result<Self, regex::Error> {
68        let builtin_set = RegexSet::new(BUILTIN_PATTERNS)?;
69        let builtin_compiled: Vec<regex::Regex> = BUILTIN_PATTERNS
70            .iter()
71            .filter_map(|p| regex::Regex::new(p).ok())
72            .collect();
73
74        let (custom_set, custom_compiled) = if custom_patterns.is_empty() {
75            (None, Vec::new())
76        } else {
77            let set = RegexSet::new(custom_patterns)?;
78            let compiled: Vec<regex::Regex> = custom_patterns
79                .iter()
80                .map(|p| regex::Regex::new(p))
81                .collect::<Result<Vec<_>, _>>()?;
82            (Some(set), compiled)
83        };
84
85        Ok(Self {
86            builtin_set,
87            builtin_compiled,
88            custom_set,
89            custom_compiled,
90        })
91    }
92
93    /// Build a redactor with custom patterns, logging a warning and skipping any invalid patterns.
94    ///
95    /// If the built-in redaction patterns fail to compile (a bug), falls back to
96    /// an empty set and logs an error rather than panicking.
97    pub fn new(custom_patterns: &[String]) -> Self {
98        let (builtin_set, builtin_compiled) = match RegexSet::new(BUILTIN_PATTERNS) {
99            Ok(set) => {
100                let compiled: Vec<regex::Regex> = BUILTIN_PATTERNS
101                    .iter()
102                    .filter_map(|p| regex::Regex::new(p).ok())
103                    .collect();
104                (set, compiled)
105            }
106            Err(e) => {
107                tracing::error!(
108                    "BUG: built-in redaction patterns failed to compile: {e}. \
109                     Redaction will be disabled."
110                );
111                // Fall back to an empty set so the process survives.
112                // An empty RegexSet always compiles successfully.
113                let empty: Vec<String> = Vec::new();
114                let empty_set = RegexSet::new(&empty).unwrap_or_else(|_| unreachable!());
115                (empty_set, Vec::new())
116            }
117        };
118
119        let (custom_set, custom_compiled) = if custom_patterns.is_empty() {
120            (None, Vec::new())
121        } else {
122            match RegexSet::new(custom_patterns) {
123                Ok(set) => {
124                    let compiled: Vec<regex::Regex> = custom_patterns
125                        .iter()
126                        .filter_map(|p| regex::Regex::new(p).ok())
127                        .collect();
128                    (Some(set), compiled)
129                }
130                Err(e) => {
131                    tracing::warn!("Failed to compile custom redaction patterns: {e}");
132                    (None, Vec::new())
133                }
134            }
135        };
136
137        Self {
138            builtin_set,
139            builtin_compiled,
140            custom_set,
141            custom_compiled,
142        }
143    }
144
145    /// Scrub sensitive data from `input` using regex patterns and JSON-key matching.
146    #[must_use]
147    pub fn redact(&self, input: &str) -> String {
148        let mut output = self.redact_regex(input);
149        output = self.redact_json_keys(&output);
150        output
151    }
152
153    fn redact_regex(&self, input: &str) -> String {
154        let has_builtin = self.builtin_set.is_match(input);
155        let has_custom = self.custom_set.as_ref().is_some_and(|c| c.is_match(input));
156
157        if !has_builtin && !has_custom {
158            return input.to_string();
159        }
160
161        let mut output = input.to_string();
162
163        if has_builtin {
164            for re in &self.builtin_compiled {
165                output = re.replace_all(&output, "[REDACTED]").to_string();
166            }
167        }
168
169        if has_custom {
170            for re in &self.custom_compiled {
171                output = re.replace_all(&output, "[REDACTED]").to_string();
172            }
173        }
174
175        output
176    }
177
178    fn redact_json_keys(&self, input: &str) -> String {
179        if let Ok(value) = serde_json::from_str::<serde_json::Value>(input) {
180            if !json_has_sensitive_keys(&value) {
181                return input.to_string();
182            }
183            let redacted = redact_json_value(&value);
184            serde_json::to_string(&redacted).unwrap_or_else(|_| input.to_string())
185        } else {
186            input.to_string()
187        }
188    }
189}
190
191fn json_has_sensitive_keys(value: &serde_json::Value) -> bool {
192    match value {
193        serde_json::Value::Object(map) => {
194            for (key, val) in map {
195                let lower_key = key.to_lowercase();
196                if SENSITIVE_JSON_KEYS
197                    .iter()
198                    .any(|k| lower_key.contains(&k.to_lowercase()))
199                {
200                    return true;
201                }
202                if json_has_sensitive_keys(val) {
203                    return true;
204                }
205            }
206            false
207        }
208        serde_json::Value::Array(arr) => arr.iter().any(json_has_sensitive_keys),
209        _ => false,
210    }
211}
212
213fn redact_json_value(value: &serde_json::Value) -> serde_json::Value {
214    match value {
215        serde_json::Value::Object(map) => {
216            let mut new_map = serde_json::Map::new();
217            for (key, val) in map {
218                let lower_key = key.to_lowercase();
219                if SENSITIVE_JSON_KEYS
220                    .iter()
221                    .any(|k| lower_key.contains(&k.to_lowercase()))
222                {
223                    if val.is_string() || val.is_number() {
224                        new_map.insert(key.clone(), serde_json::Value::String("[REDACTED]".into()));
225                    } else if val.is_boolean() {
226                        // Booleans like has_api_key: true are safe — they indicate presence, not value
227                        new_map.insert(key.clone(), val.clone());
228                    } else {
229                        new_map.insert(key.clone(), serde_json::Value::String("[REDACTED]".into()));
230                    }
231                } else {
232                    new_map.insert(key.clone(), redact_json_value(val));
233                }
234            }
235            serde_json::Value::Object(new_map)
236        }
237        serde_json::Value::Array(arr) => {
238            serde_json::Value::Array(arr.iter().map(redact_json_value).collect())
239        }
240        other => other.clone(),
241    }
242}
243
244impl Default for Redactor {
245    fn default() -> Self {
246        Self::new(&[])
247    }
248}
249
250#[cfg(test)]
251mod tests {
252    use super::*;
253
254    #[test]
255    fn redacts_api_keys() {
256        let r = Redactor::default();
257        assert!(
258            r.redact("key is sk-abc123def456ghi789jkl012mno")
259                .contains("[REDACTED]")
260        );
261    }
262
263    #[test]
264    fn redacts_bearer_tokens() {
265        let r = Redactor::default();
266        let input = "Authorization: Bearer eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJzdWIiOiIxMjM0NTY3ODkwIn0.dozjgNryP4J3jVmNHl0w5N_XgL0n3I9PlFUP0THsR8U";
267        let output = r.redact(input);
268        assert!(output.contains("[REDACTED]"));
269        assert!(!output.contains("eyJhbGci"));
270    }
271
272    #[test]
273    fn redacts_emails() {
274        let r = Redactor::default();
275        assert!(
276            r.redact("contact user@example.com for help")
277                .contains("[REDACTED]")
278        );
279    }
280
281    #[test]
282    fn passes_through_clean_text() {
283        let r = Redactor::default();
284        let input = r#"{"ok": true, "title": "My App"}"#;
285        assert_eq!(r.redact(input), input);
286    }
287
288    #[test]
289    fn custom_patterns_work() {
290        let r = Redactor::new(&["secret_\\w+".to_string()]);
291        assert!(
292            r.redact("found secret_project_alpha here")
293                .contains("[REDACTED]")
294        );
295    }
296
297    #[test]
298    fn redacts_json_sensitive_keys() {
299        let r = Redactor::default();
300        let input = r#"{"api_key":"sk-test-12345","name":"John","token":"abc123"}"#;
301        let output = r.redact(input);
302        assert!(output.contains("[REDACTED]"));
303        assert!(output.contains("John"));
304        assert!(!output.contains("sk-test-12345"));
305    }
306
307    #[test]
308    fn preserves_boolean_sensitive_keys() {
309        let r = Redactor::default();
310        let input = r#"{"has_api_key":true,"api_key":"secret-value-here"}"#;
311        let output = r.redact(input);
312        let parsed: serde_json::Value = serde_json::from_str(&output).unwrap();
313        assert_eq!(parsed["has_api_key"], serde_json::Value::Bool(true));
314        assert_eq!(
315            parsed["api_key"],
316            serde_json::Value::String("[REDACTED]".into())
317        );
318    }
319
320    #[test]
321    fn redacts_nested_json_keys() {
322        let r = Redactor::default();
323        let input = r#"{"config":{"llm":{"api_key":"sk-live-xxx","model":"gpt-4"}}}"#;
324        let output = r.redact(input);
325        assert!(output.contains("[REDACTED]"));
326        assert!(output.contains("gpt-4"));
327        assert!(!output.contains("sk-live-xxx"));
328    }
329
330    #[test]
331    fn redacts_github_tokens() {
332        let r = Redactor::default();
333        assert!(
334            r.redact("ghp_ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmno")
335                .contains("[REDACTED]")
336        );
337    }
338
339    #[test]
340    fn redacts_stripe_keys() {
341        let r = Redactor::default();
342        assert!(
343            r.redact("sk_test_ABCDEFGHIJKLMNOPQRSTUVWXYZab")
344                .contains("[REDACTED]")
345        );
346    }
347
348    #[test]
349    fn try_new_valid_patterns() {
350        let r = Redactor::try_new(&["secret_\\w+".to_string()]);
351        assert!(r.is_ok());
352        let r = r.unwrap();
353        assert!(r.redact("found secret_alpha here").contains("[REDACTED]"));
354    }
355
356    #[test]
357    fn try_new_invalid_pattern_returns_error() {
358        let r = Redactor::try_new(&["[invalid".to_string()]);
359        assert!(r.is_err());
360    }
361
362    #[test]
363    fn try_new_empty_patterns() {
364        let r = Redactor::try_new(&[]);
365        assert!(r.is_ok());
366    }
367}