1use regex::RegexSet;
2
3const BUILTIN_PATTERNS: &[&str] = &[
4 r"(?i)\b(sk|pk|key)[-_][a-zA-Z0-9]{20,}\b",
6 r"(?i)bearer\s+[a-zA-Z0-9\-_.~+/]{20,}",
8 r"\bAKIA[0-9A-Z]{16}\b",
10 r"\beyJ[a-zA-Z0-9_-]{10,}\.[a-zA-Z0-9_-]{10,}\.[a-zA-Z0-9_-]{10,}\b",
12 r"\b[0-9a-fA-F]{40,}\b",
14 r"\b[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}\b",
16 r"\b\d{4}[- ]?\d{4}[- ]?\d{4}[- ]?\d{4}\b",
18 r"\bsk-[a-zA-Z0-9]{32,}\b",
20 r"\bsk-ant-[a-zA-Z0-9\-]{20,}\b",
22 r"\b(ghp|gho|ghu|ghs|ghr)_[a-zA-Z0-9]{36,}\b",
24 r"\b(sk|pk|rk)_(test|live)_[a-zA-Z0-9]{20,}\b",
26];
27
28const SENSITIVE_JSON_KEYS: &[&str] = &[
29 "api_key",
30 "apiKey",
31 "api-key",
32 "secret",
33 "password",
34 "passwd",
35 "token",
36 "access_token",
37 "refresh_token",
38 "private_key",
39 "privateKey",
40 "secret_key",
41 "secretKey",
42 "authorization",
43 "auth_token",
44 "session_token",
45 "cookie",
46 "credentials",
47 "ssn",
48 "credit_card",
49 "card_number",
50];
51
52pub struct Redactor {
55 builtin_set: RegexSet,
56 builtin_compiled: Vec<regex::Regex>,
57 custom_set: Option<RegexSet>,
58 custom_compiled: Vec<regex::Regex>,
59}
60
61impl Redactor {
62 pub fn try_new(custom_patterns: &[String]) -> Result<Self, regex::Error> {
68 let builtin_set = RegexSet::new(BUILTIN_PATTERNS)?;
69 let builtin_compiled: Vec<regex::Regex> = BUILTIN_PATTERNS
70 .iter()
71 .filter_map(|p| regex::Regex::new(p).ok())
72 .collect();
73
74 let (custom_set, custom_compiled) = if custom_patterns.is_empty() {
75 (None, Vec::new())
76 } else {
77 let set = RegexSet::new(custom_patterns)?;
78 let compiled: Vec<regex::Regex> = custom_patterns
79 .iter()
80 .map(|p| regex::Regex::new(p))
81 .collect::<Result<Vec<_>, _>>()?;
82 (Some(set), compiled)
83 };
84
85 Ok(Self {
86 builtin_set,
87 builtin_compiled,
88 custom_set,
89 custom_compiled,
90 })
91 }
92
93 pub fn new(custom_patterns: &[String]) -> Self {
99 let builtin_set =
100 RegexSet::new(BUILTIN_PATTERNS).expect("builtin redaction patterns must compile");
101 let builtin_compiled: Vec<regex::Regex> = BUILTIN_PATTERNS
102 .iter()
103 .filter_map(|p| regex::Regex::new(p).ok())
104 .collect();
105
106 let (custom_set, custom_compiled) = if custom_patterns.is_empty() {
107 (None, Vec::new())
108 } else {
109 match RegexSet::new(custom_patterns) {
110 Ok(set) => {
111 let compiled: Vec<regex::Regex> = custom_patterns
112 .iter()
113 .filter_map(|p| regex::Regex::new(p).ok())
114 .collect();
115 (Some(set), compiled)
116 }
117 Err(e) => {
118 tracing::warn!("Failed to compile custom redaction patterns: {e}");
119 (None, Vec::new())
120 }
121 }
122 };
123
124 Self {
125 builtin_set,
126 builtin_compiled,
127 custom_set,
128 custom_compiled,
129 }
130 }
131
132 #[must_use]
134 pub fn redact(&self, input: &str) -> String {
135 let mut output = self.redact_regex(input);
136 output = self.redact_json_keys(&output);
137 output
138 }
139
140 fn redact_regex(&self, input: &str) -> String {
141 let has_builtin = self.builtin_set.is_match(input);
142 let has_custom = self.custom_set.as_ref().is_some_and(|c| c.is_match(input));
143
144 if !has_builtin && !has_custom {
145 return input.to_string();
146 }
147
148 let mut output = input.to_string();
149
150 if has_builtin {
151 for re in &self.builtin_compiled {
152 output = re.replace_all(&output, "[REDACTED]").to_string();
153 }
154 }
155
156 if has_custom {
157 for re in &self.custom_compiled {
158 output = re.replace_all(&output, "[REDACTED]").to_string();
159 }
160 }
161
162 output
163 }
164
165 fn redact_json_keys(&self, input: &str) -> String {
166 if let Ok(value) = serde_json::from_str::<serde_json::Value>(input) {
167 if !json_has_sensitive_keys(&value) {
168 return input.to_string();
169 }
170 let redacted = redact_json_value(&value);
171 serde_json::to_string(&redacted).unwrap_or_else(|_| input.to_string())
172 } else {
173 input.to_string()
174 }
175 }
176}
177
178fn json_has_sensitive_keys(value: &serde_json::Value) -> bool {
179 match value {
180 serde_json::Value::Object(map) => {
181 for (key, val) in map {
182 let lower_key = key.to_lowercase();
183 if SENSITIVE_JSON_KEYS
184 .iter()
185 .any(|k| lower_key.contains(&k.to_lowercase()))
186 {
187 return true;
188 }
189 if json_has_sensitive_keys(val) {
190 return true;
191 }
192 }
193 false
194 }
195 serde_json::Value::Array(arr) => arr.iter().any(json_has_sensitive_keys),
196 _ => false,
197 }
198}
199
200fn redact_json_value(value: &serde_json::Value) -> serde_json::Value {
201 match value {
202 serde_json::Value::Object(map) => {
203 let mut new_map = serde_json::Map::new();
204 for (key, val) in map {
205 let lower_key = key.to_lowercase();
206 if SENSITIVE_JSON_KEYS
207 .iter()
208 .any(|k| lower_key.contains(&k.to_lowercase()))
209 {
210 if val.is_string() || val.is_number() {
211 new_map.insert(key.clone(), serde_json::Value::String("[REDACTED]".into()));
212 } else if val.is_boolean() {
213 new_map.insert(key.clone(), val.clone());
215 } else {
216 new_map.insert(key.clone(), serde_json::Value::String("[REDACTED]".into()));
217 }
218 } else {
219 new_map.insert(key.clone(), redact_json_value(val));
220 }
221 }
222 serde_json::Value::Object(new_map)
223 }
224 serde_json::Value::Array(arr) => {
225 serde_json::Value::Array(arr.iter().map(redact_json_value).collect())
226 }
227 other => other.clone(),
228 }
229}
230
231impl Default for Redactor {
232 fn default() -> Self {
233 Self::new(&[])
234 }
235}
236
237#[cfg(test)]
238mod tests {
239 use super::*;
240
241 #[test]
242 fn redacts_api_keys() {
243 let r = Redactor::default();
244 assert!(
245 r.redact("key is sk-abc123def456ghi789jkl012mno")
246 .contains("[REDACTED]")
247 );
248 }
249
250 #[test]
251 fn redacts_bearer_tokens() {
252 let r = Redactor::default();
253 let input = "Authorization: Bearer eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJzdWIiOiIxMjM0NTY3ODkwIn0.dozjgNryP4J3jVmNHl0w5N_XgL0n3I9PlFUP0THsR8U";
254 let output = r.redact(input);
255 assert!(output.contains("[REDACTED]"));
256 assert!(!output.contains("eyJhbGci"));
257 }
258
259 #[test]
260 fn redacts_emails() {
261 let r = Redactor::default();
262 assert!(
263 r.redact("contact user@example.com for help")
264 .contains("[REDACTED]")
265 );
266 }
267
268 #[test]
269 fn passes_through_clean_text() {
270 let r = Redactor::default();
271 let input = r#"{"ok": true, "title": "My App"}"#;
272 assert_eq!(r.redact(input), input);
273 }
274
275 #[test]
276 fn custom_patterns_work() {
277 let r = Redactor::new(&["secret_\\w+".to_string()]);
278 assert!(
279 r.redact("found secret_project_alpha here")
280 .contains("[REDACTED]")
281 );
282 }
283
284 #[test]
285 fn redacts_json_sensitive_keys() {
286 let r = Redactor::default();
287 let input = r#"{"api_key":"sk-test-12345","name":"John","token":"abc123"}"#;
288 let output = r.redact(input);
289 assert!(output.contains("[REDACTED]"));
290 assert!(output.contains("John"));
291 assert!(!output.contains("sk-test-12345"));
292 }
293
294 #[test]
295 fn preserves_boolean_sensitive_keys() {
296 let r = Redactor::default();
297 let input = r#"{"has_api_key":true,"api_key":"secret-value-here"}"#;
298 let output = r.redact(input);
299 let parsed: serde_json::Value = serde_json::from_str(&output).unwrap();
300 assert_eq!(parsed["has_api_key"], serde_json::Value::Bool(true));
301 assert_eq!(
302 parsed["api_key"],
303 serde_json::Value::String("[REDACTED]".into())
304 );
305 }
306
307 #[test]
308 fn redacts_nested_json_keys() {
309 let r = Redactor::default();
310 let input = r#"{"config":{"llm":{"api_key":"sk-live-xxx","model":"gpt-4"}}}"#;
311 let output = r.redact(input);
312 assert!(output.contains("[REDACTED]"));
313 assert!(output.contains("gpt-4"));
314 assert!(!output.contains("sk-live-xxx"));
315 }
316
317 #[test]
318 fn redacts_github_tokens() {
319 let r = Redactor::default();
320 assert!(
321 r.redact("ghp_ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmno")
322 .contains("[REDACTED]")
323 );
324 }
325
326 #[test]
327 fn redacts_stripe_keys() {
328 let r = Redactor::default();
329 assert!(
330 r.redact("sk_test_ABCDEFGHIJKLMNOPQRSTUVWXYZab")
331 .contains("[REDACTED]")
332 );
333 }
334
335 #[test]
336 fn try_new_valid_patterns() {
337 let r = Redactor::try_new(&["secret_\\w+".to_string()]);
338 assert!(r.is_ok());
339 let r = r.unwrap();
340 assert!(r.redact("found secret_alpha here").contains("[REDACTED]"));
341 }
342
343 #[test]
344 fn try_new_invalid_pattern_returns_error() {
345 let r = Redactor::try_new(&["[invalid".to_string()]);
346 assert!(r.is_err());
347 }
348
349 #[test]
350 fn try_new_empty_patterns() {
351 let r = Redactor::try_new(&[]);
352 assert!(r.is_ok());
353 }
354}