1use regex::RegexSet;
2
3const BUILTIN_PATTERNS: &[&str] = &[
4 r"(?i)\b(sk|pk|key)[-_][a-zA-Z0-9]{20,}\b",
6 r"(?i)bearer\s+[a-zA-Z0-9\-_.~+/]{20,}",
8 r"\bAKIA[0-9A-Z]{16}\b",
10 r"\beyJ[a-zA-Z0-9_-]{10,}\.[a-zA-Z0-9_-]{10,}\.[a-zA-Z0-9_-]{10,}\b",
12 r"\b[0-9a-fA-F]{40,}\b",
14 r"\b[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}\b",
16 r"\b\d{4}[- ]?\d{4}[- ]?\d{4}[- ]?\d{4}\b",
18 r"\bsk-[a-zA-Z0-9]{32,}\b",
20 r"\bsk-ant-[a-zA-Z0-9\-]{20,}\b",
22 r"\b(ghp|gho|ghu|ghs|ghr)_[a-zA-Z0-9]{36,}\b",
24 r"\b(sk|pk|rk)_(test|live)_[a-zA-Z0-9]{20,}\b",
26];
27
28const SENSITIVE_JSON_KEYS: &[&str] = &[
29 "api_key",
30 "apiKey",
31 "api-key",
32 "secret",
33 "password",
34 "passwd",
35 "token",
36 "access_token",
37 "refresh_token",
38 "private_key",
39 "privateKey",
40 "secret_key",
41 "secretKey",
42 "authorization",
43 "auth_token",
44 "session_token",
45 "cookie",
46 "credentials",
47 "ssn",
48 "credit_card",
49 "card_number",
50];
51
52pub struct Redactor {
55 builtin_set: RegexSet,
56 builtin_compiled: Vec<regex::Regex>,
57 custom_set: Option<RegexSet>,
58 custom_compiled: Vec<regex::Regex>,
59}
60
61impl Redactor {
62 pub fn try_new(custom_patterns: &[String]) -> Result<Self, regex::Error> {
64 let builtin_set = RegexSet::new(BUILTIN_PATTERNS)?;
65 let builtin_compiled: Vec<regex::Regex> = BUILTIN_PATTERNS
66 .iter()
67 .filter_map(|p| regex::Regex::new(p).ok())
68 .collect();
69
70 let (custom_set, custom_compiled) = if custom_patterns.is_empty() {
71 (None, Vec::new())
72 } else {
73 let set = RegexSet::new(custom_patterns)?;
74 let compiled: Vec<regex::Regex> = custom_patterns
75 .iter()
76 .map(|p| regex::Regex::new(p))
77 .collect::<Result<Vec<_>, _>>()?;
78 (Some(set), compiled)
79 };
80
81 Ok(Self {
82 builtin_set,
83 builtin_compiled,
84 custom_set,
85 custom_compiled,
86 })
87 }
88
89 pub fn new(custom_patterns: &[String]) -> Self {
91 let builtin_set =
92 RegexSet::new(BUILTIN_PATTERNS).expect("builtin redaction patterns must compile");
93 let builtin_compiled: Vec<regex::Regex> = BUILTIN_PATTERNS
94 .iter()
95 .filter_map(|p| regex::Regex::new(p).ok())
96 .collect();
97
98 let (custom_set, custom_compiled) = if custom_patterns.is_empty() {
99 (None, Vec::new())
100 } else {
101 match RegexSet::new(custom_patterns) {
102 Ok(set) => {
103 let compiled: Vec<regex::Regex> = custom_patterns
104 .iter()
105 .filter_map(|p| regex::Regex::new(p).ok())
106 .collect();
107 (Some(set), compiled)
108 }
109 Err(e) => {
110 tracing::warn!("Failed to compile custom redaction patterns: {e}");
111 (None, Vec::new())
112 }
113 }
114 };
115
116 Self {
117 builtin_set,
118 builtin_compiled,
119 custom_set,
120 custom_compiled,
121 }
122 }
123
124 pub fn redact(&self, input: &str) -> String {
126 let mut output = self.redact_regex(input);
127 output = self.redact_json_keys(&output);
128 output
129 }
130
131 fn redact_regex(&self, input: &str) -> String {
132 let has_builtin = self.builtin_set.is_match(input);
133 let has_custom = self.custom_set.as_ref().is_some_and(|c| c.is_match(input));
134
135 if !has_builtin && !has_custom {
136 return input.to_string();
137 }
138
139 let mut output = input.to_string();
140
141 if has_builtin {
142 for re in &self.builtin_compiled {
143 output = re.replace_all(&output, "[REDACTED]").to_string();
144 }
145 }
146
147 if has_custom {
148 for re in &self.custom_compiled {
149 output = re.replace_all(&output, "[REDACTED]").to_string();
150 }
151 }
152
153 output
154 }
155
156 fn redact_json_keys(&self, input: &str) -> String {
157 if let Ok(value) = serde_json::from_str::<serde_json::Value>(input) {
158 if !json_has_sensitive_keys(&value) {
159 return input.to_string();
160 }
161 let redacted = redact_json_value(&value);
162 serde_json::to_string(&redacted).unwrap_or_else(|_| input.to_string())
163 } else {
164 input.to_string()
165 }
166 }
167}
168
169fn json_has_sensitive_keys(value: &serde_json::Value) -> bool {
170 match value {
171 serde_json::Value::Object(map) => {
172 for (key, val) in map {
173 let lower_key = key.to_lowercase();
174 if SENSITIVE_JSON_KEYS
175 .iter()
176 .any(|k| lower_key.contains(&k.to_lowercase()))
177 {
178 return true;
179 }
180 if json_has_sensitive_keys(val) {
181 return true;
182 }
183 }
184 false
185 }
186 serde_json::Value::Array(arr) => arr.iter().any(json_has_sensitive_keys),
187 _ => false,
188 }
189}
190
191fn redact_json_value(value: &serde_json::Value) -> serde_json::Value {
192 match value {
193 serde_json::Value::Object(map) => {
194 let mut new_map = serde_json::Map::new();
195 for (key, val) in map {
196 let lower_key = key.to_lowercase();
197 if SENSITIVE_JSON_KEYS
198 .iter()
199 .any(|k| lower_key.contains(&k.to_lowercase()))
200 {
201 if val.is_string() || val.is_number() {
202 new_map.insert(key.clone(), serde_json::Value::String("[REDACTED]".into()));
203 } else if val.is_boolean() {
204 new_map.insert(key.clone(), val.clone());
206 } else {
207 new_map.insert(key.clone(), serde_json::Value::String("[REDACTED]".into()));
208 }
209 } else {
210 new_map.insert(key.clone(), redact_json_value(val));
211 }
212 }
213 serde_json::Value::Object(new_map)
214 }
215 serde_json::Value::Array(arr) => {
216 serde_json::Value::Array(arr.iter().map(redact_json_value).collect())
217 }
218 other => other.clone(),
219 }
220}
221
222impl Default for Redactor {
223 fn default() -> Self {
224 Self::new(&[])
225 }
226}
227
228#[cfg(test)]
229mod tests {
230 use super::*;
231
232 #[test]
233 fn redacts_api_keys() {
234 let r = Redactor::default();
235 assert!(
236 r.redact("key is sk-abc123def456ghi789jkl012mno")
237 .contains("[REDACTED]")
238 );
239 }
240
241 #[test]
242 fn redacts_bearer_tokens() {
243 let r = Redactor::default();
244 let input = "Authorization: Bearer eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJzdWIiOiIxMjM0NTY3ODkwIn0.dozjgNryP4J3jVmNHl0w5N_XgL0n3I9PlFUP0THsR8U";
245 let output = r.redact(input);
246 assert!(output.contains("[REDACTED]"));
247 assert!(!output.contains("eyJhbGci"));
248 }
249
250 #[test]
251 fn redacts_emails() {
252 let r = Redactor::default();
253 assert!(
254 r.redact("contact user@example.com for help")
255 .contains("[REDACTED]")
256 );
257 }
258
259 #[test]
260 fn passes_through_clean_text() {
261 let r = Redactor::default();
262 let input = r#"{"ok": true, "title": "My App"}"#;
263 assert_eq!(r.redact(input), input);
264 }
265
266 #[test]
267 fn custom_patterns_work() {
268 let r = Redactor::new(&["secret_\\w+".to_string()]);
269 assert!(
270 r.redact("found secret_project_alpha here")
271 .contains("[REDACTED]")
272 );
273 }
274
275 #[test]
276 fn redacts_json_sensitive_keys() {
277 let r = Redactor::default();
278 let input = r#"{"api_key":"sk-test-12345","name":"John","token":"abc123"}"#;
279 let output = r.redact(input);
280 assert!(output.contains("[REDACTED]"));
281 assert!(output.contains("John"));
282 assert!(!output.contains("sk-test-12345"));
283 }
284
285 #[test]
286 fn preserves_boolean_sensitive_keys() {
287 let r = Redactor::default();
288 let input = r#"{"has_api_key":true,"api_key":"secret-value-here"}"#;
289 let output = r.redact(input);
290 let parsed: serde_json::Value = serde_json::from_str(&output).unwrap();
291 assert_eq!(parsed["has_api_key"], serde_json::Value::Bool(true));
292 assert_eq!(
293 parsed["api_key"],
294 serde_json::Value::String("[REDACTED]".into())
295 );
296 }
297
298 #[test]
299 fn redacts_nested_json_keys() {
300 let r = Redactor::default();
301 let input = r#"{"config":{"llm":{"api_key":"sk-live-xxx","model":"gpt-4"}}}"#;
302 let output = r.redact(input);
303 assert!(output.contains("[REDACTED]"));
304 assert!(output.contains("gpt-4"));
305 assert!(!output.contains("sk-live-xxx"));
306 }
307
308 #[test]
309 fn redacts_github_tokens() {
310 let r = Redactor::default();
311 assert!(
312 r.redact("ghp_ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmno")
313 .contains("[REDACTED]")
314 );
315 }
316
317 #[test]
318 fn redacts_stripe_keys() {
319 let r = Redactor::default();
320 assert!(
321 r.redact("sk_test_ABCDEFGHIJKLMNOPQRSTUVWXYZab")
322 .contains("[REDACTED]")
323 );
324 }
325
326 #[test]
327 fn try_new_valid_patterns() {
328 let r = Redactor::try_new(&["secret_\\w+".to_string()]);
329 assert!(r.is_ok());
330 let r = r.unwrap();
331 assert!(r.redact("found secret_alpha here").contains("[REDACTED]"));
332 }
333
334 #[test]
335 fn try_new_invalid_pattern_returns_error() {
336 let r = Redactor::try_new(&["[invalid".to_string()]);
337 assert!(r.is_err());
338 }
339
340 #[test]
341 fn try_new_empty_patterns() {
342 let r = Redactor::try_new(&[]);
343 assert!(r.is_ok());
344 }
345}