Skip to main content

victauri_plugin/
redaction.rs

1use regex::RegexSet;
2
3const BUILTIN_PATTERNS: &[&str] = &[
4    // API keys: sk-..., pk-..., key-... (20+ chars)
5    r"(?i)\b(sk|pk|key)[-_][a-zA-Z0-9]{20,}\b",
6    // Bearer tokens in output
7    r"(?i)bearer\s+[a-zA-Z0-9\-_.~+/]{20,}",
8    // AWS keys
9    r"\bAKIA[0-9A-Z]{16}\b",
10    // JWT tokens (3 base64 sections separated by dots)
11    r"\beyJ[a-zA-Z0-9_-]{10,}\.[a-zA-Z0-9_-]{10,}\.[a-zA-Z0-9_-]{10,}\b",
12    // Generic long hex secrets (40+ hex chars — SHA1 hashes, API keys)
13    r"\b[0-9a-fA-F]{40,}\b",
14    // Email addresses
15    r"\b[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}\b",
16    // Credit card numbers (basic patterns)
17    r"\b\d{4}[- ]?\d{4}[- ]?\d{4}[- ]?\d{4}\b",
18    // OpenAI-style keys
19    r"\bsk-[a-zA-Z0-9]{32,}\b",
20    // Anthropic keys
21    r"\bsk-ant-[a-zA-Z0-9\-]{20,}\b",
22    // GitHub tokens
23    r"\b(ghp|gho|ghu|ghs|ghr)_[a-zA-Z0-9]{36,}\b",
24    // Stripe keys
25    r"\b(sk|pk|rk)_(test|live)_[a-zA-Z0-9]{20,}\b",
26];
27
28const SENSITIVE_JSON_KEYS: &[&str] = &[
29    "api_key",
30    "apiKey",
31    "api-key",
32    "secret",
33    "password",
34    "passwd",
35    "token",
36    "access_token",
37    "refresh_token",
38    "private_key",
39    "privateKey",
40    "secret_key",
41    "secretKey",
42    "authorization",
43    "auth_token",
44    "session_token",
45    "cookie",
46    "credentials",
47    "ssn",
48    "credit_card",
49    "card_number",
50];
51
52/// Output redactor that scrubs API keys, tokens, emails, and sensitive JSON keys
53/// from MCP tool output. Applies built-in patterns plus optional custom regexes.
54pub struct Redactor {
55    builtin_set: RegexSet,
56    builtin_compiled: Vec<regex::Regex>,
57    custom_set: Option<RegexSet>,
58    custom_compiled: Vec<regex::Regex>,
59}
60
61impl Redactor {
62    /// Build a redactor with custom patterns, returning an error if any pattern is invalid.
63    pub fn try_new(custom_patterns: &[String]) -> Result<Self, regex::Error> {
64        let builtin_set = RegexSet::new(BUILTIN_PATTERNS)?;
65        let builtin_compiled: Vec<regex::Regex> = BUILTIN_PATTERNS
66            .iter()
67            .filter_map(|p| regex::Regex::new(p).ok())
68            .collect();
69
70        let (custom_set, custom_compiled) = if custom_patterns.is_empty() {
71            (None, Vec::new())
72        } else {
73            let set = RegexSet::new(custom_patterns)?;
74            let compiled: Vec<regex::Regex> = custom_patterns
75                .iter()
76                .map(|p| regex::Regex::new(p))
77                .collect::<Result<Vec<_>, _>>()?;
78            (Some(set), compiled)
79        };
80
81        Ok(Self {
82            builtin_set,
83            builtin_compiled,
84            custom_set,
85            custom_compiled,
86        })
87    }
88
89    /// Build a redactor with custom patterns, logging a warning and skipping any invalid patterns.
90    pub fn new(custom_patterns: &[String]) -> Self {
91        let builtin_set =
92            RegexSet::new(BUILTIN_PATTERNS).expect("builtin redaction patterns must compile");
93        let builtin_compiled: Vec<regex::Regex> = BUILTIN_PATTERNS
94            .iter()
95            .filter_map(|p| regex::Regex::new(p).ok())
96            .collect();
97
98        let (custom_set, custom_compiled) = if custom_patterns.is_empty() {
99            (None, Vec::new())
100        } else {
101            match RegexSet::new(custom_patterns) {
102                Ok(set) => {
103                    let compiled: Vec<regex::Regex> = custom_patterns
104                        .iter()
105                        .filter_map(|p| regex::Regex::new(p).ok())
106                        .collect();
107                    (Some(set), compiled)
108                }
109                Err(e) => {
110                    tracing::warn!("Failed to compile custom redaction patterns: {e}");
111                    (None, Vec::new())
112                }
113            }
114        };
115
116        Self {
117            builtin_set,
118            builtin_compiled,
119            custom_set,
120            custom_compiled,
121        }
122    }
123
124    /// Scrub sensitive data from `input` using regex patterns and JSON-key matching.
125    pub fn redact(&self, input: &str) -> String {
126        let mut output = self.redact_regex(input);
127        output = self.redact_json_keys(&output);
128        output
129    }
130
131    fn redact_regex(&self, input: &str) -> String {
132        let has_builtin = self.builtin_set.is_match(input);
133        let has_custom = self.custom_set.as_ref().is_some_and(|c| c.is_match(input));
134
135        if !has_builtin && !has_custom {
136            return input.to_string();
137        }
138
139        let mut output = input.to_string();
140
141        if has_builtin {
142            for re in &self.builtin_compiled {
143                output = re.replace_all(&output, "[REDACTED]").to_string();
144            }
145        }
146
147        if has_custom {
148            for re in &self.custom_compiled {
149                output = re.replace_all(&output, "[REDACTED]").to_string();
150            }
151        }
152
153        output
154    }
155
156    fn redact_json_keys(&self, input: &str) -> String {
157        if let Ok(value) = serde_json::from_str::<serde_json::Value>(input) {
158            if !json_has_sensitive_keys(&value) {
159                return input.to_string();
160            }
161            let redacted = redact_json_value(&value);
162            serde_json::to_string(&redacted).unwrap_or_else(|_| input.to_string())
163        } else {
164            input.to_string()
165        }
166    }
167}
168
169fn json_has_sensitive_keys(value: &serde_json::Value) -> bool {
170    match value {
171        serde_json::Value::Object(map) => {
172            for (key, val) in map {
173                let lower_key = key.to_lowercase();
174                if SENSITIVE_JSON_KEYS
175                    .iter()
176                    .any(|k| lower_key.contains(&k.to_lowercase()))
177                {
178                    return true;
179                }
180                if json_has_sensitive_keys(val) {
181                    return true;
182                }
183            }
184            false
185        }
186        serde_json::Value::Array(arr) => arr.iter().any(json_has_sensitive_keys),
187        _ => false,
188    }
189}
190
191fn redact_json_value(value: &serde_json::Value) -> serde_json::Value {
192    match value {
193        serde_json::Value::Object(map) => {
194            let mut new_map = serde_json::Map::new();
195            for (key, val) in map {
196                let lower_key = key.to_lowercase();
197                if SENSITIVE_JSON_KEYS
198                    .iter()
199                    .any(|k| lower_key.contains(&k.to_lowercase()))
200                {
201                    if val.is_string() || val.is_number() {
202                        new_map.insert(key.clone(), serde_json::Value::String("[REDACTED]".into()));
203                    } else if val.is_boolean() {
204                        // Booleans like has_api_key: true are safe — they indicate presence, not value
205                        new_map.insert(key.clone(), val.clone());
206                    } else {
207                        new_map.insert(key.clone(), serde_json::Value::String("[REDACTED]".into()));
208                    }
209                } else {
210                    new_map.insert(key.clone(), redact_json_value(val));
211                }
212            }
213            serde_json::Value::Object(new_map)
214        }
215        serde_json::Value::Array(arr) => {
216            serde_json::Value::Array(arr.iter().map(redact_json_value).collect())
217        }
218        other => other.clone(),
219    }
220}
221
222impl Default for Redactor {
223    fn default() -> Self {
224        Self::new(&[])
225    }
226}
227
228#[cfg(test)]
229mod tests {
230    use super::*;
231
232    #[test]
233    fn redacts_api_keys() {
234        let r = Redactor::default();
235        assert!(
236            r.redact("key is sk-abc123def456ghi789jkl012mno")
237                .contains("[REDACTED]")
238        );
239    }
240
241    #[test]
242    fn redacts_bearer_tokens() {
243        let r = Redactor::default();
244        let input = "Authorization: Bearer eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJzdWIiOiIxMjM0NTY3ODkwIn0.dozjgNryP4J3jVmNHl0w5N_XgL0n3I9PlFUP0THsR8U";
245        let output = r.redact(input);
246        assert!(output.contains("[REDACTED]"));
247        assert!(!output.contains("eyJhbGci"));
248    }
249
250    #[test]
251    fn redacts_emails() {
252        let r = Redactor::default();
253        assert!(
254            r.redact("contact user@example.com for help")
255                .contains("[REDACTED]")
256        );
257    }
258
259    #[test]
260    fn passes_through_clean_text() {
261        let r = Redactor::default();
262        let input = r#"{"ok": true, "title": "My App"}"#;
263        assert_eq!(r.redact(input), input);
264    }
265
266    #[test]
267    fn custom_patterns_work() {
268        let r = Redactor::new(&["secret_\\w+".to_string()]);
269        assert!(
270            r.redact("found secret_project_alpha here")
271                .contains("[REDACTED]")
272        );
273    }
274
275    #[test]
276    fn redacts_json_sensitive_keys() {
277        let r = Redactor::default();
278        let input = r#"{"api_key":"sk-test-12345","name":"John","token":"abc123"}"#;
279        let output = r.redact(input);
280        assert!(output.contains("[REDACTED]"));
281        assert!(output.contains("John"));
282        assert!(!output.contains("sk-test-12345"));
283    }
284
285    #[test]
286    fn preserves_boolean_sensitive_keys() {
287        let r = Redactor::default();
288        let input = r#"{"has_api_key":true,"api_key":"secret-value-here"}"#;
289        let output = r.redact(input);
290        let parsed: serde_json::Value = serde_json::from_str(&output).unwrap();
291        assert_eq!(parsed["has_api_key"], serde_json::Value::Bool(true));
292        assert_eq!(
293            parsed["api_key"],
294            serde_json::Value::String("[REDACTED]".into())
295        );
296    }
297
298    #[test]
299    fn redacts_nested_json_keys() {
300        let r = Redactor::default();
301        let input = r#"{"config":{"llm":{"api_key":"sk-live-xxx","model":"gpt-4"}}}"#;
302        let output = r.redact(input);
303        assert!(output.contains("[REDACTED]"));
304        assert!(output.contains("gpt-4"));
305        assert!(!output.contains("sk-live-xxx"));
306    }
307
308    #[test]
309    fn redacts_github_tokens() {
310        let r = Redactor::default();
311        assert!(
312            r.redact("ghp_ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmno")
313                .contains("[REDACTED]")
314        );
315    }
316
317    #[test]
318    fn redacts_stripe_keys() {
319        let r = Redactor::default();
320        assert!(
321            r.redact("sk_test_ABCDEFGHIJKLMNOPQRSTUVWXYZab")
322                .contains("[REDACTED]")
323        );
324    }
325
326    #[test]
327    fn try_new_valid_patterns() {
328        let r = Redactor::try_new(&["secret_\\w+".to_string()]);
329        assert!(r.is_ok());
330        let r = r.unwrap();
331        assert!(r.redact("found secret_alpha here").contains("[REDACTED]"));
332    }
333
334    #[test]
335    fn try_new_invalid_pattern_returns_error() {
336        let r = Redactor::try_new(&["[invalid".to_string()]);
337        assert!(r.is_err());
338    }
339
340    #[test]
341    fn try_new_empty_patterns() {
342        let r = Redactor::try_new(&[]);
343        assert!(r.is_ok());
344    }
345}