1use regex::RegexSet;
2
3const BUILTIN_PATTERNS: &[&str] = &[
4 r"(?i)\b(sk|pk|key)[-_][a-zA-Z0-9]{20,}\b",
6 r"(?i)bearer\s+[a-zA-Z0-9\-_.~+/]{20,}",
8 r"\bAKIA[0-9A-Z]{16}\b",
10 r"\beyJ[a-zA-Z0-9_-]{10,}\.[a-zA-Z0-9_-]{10,}\.[a-zA-Z0-9_-]{10,}\b",
12 r"\b[0-9a-fA-F]{40,}\b",
14 r"\b[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}\b",
16 r"\b\d{4}[- ]?\d{4}[- ]?\d{4}[- ]?\d{4}\b",
18 r"\bsk-[a-zA-Z0-9]{32,}\b",
20 r"\bsk-ant-[a-zA-Z0-9\-]{20,}\b",
22 r"\b(ghp|gho|ghu|ghs|ghr)_[a-zA-Z0-9]{36,}\b",
24 r"\b(sk|pk|rk)_(test|live)_[a-zA-Z0-9]{20,}\b",
26];
27
28const SENSITIVE_JSON_KEYS: &[&str] = &[
29 "api_key",
30 "apiKey",
31 "api-key",
32 "secret",
33 "password",
34 "passwd",
35 "token",
36 "access_token",
37 "refresh_token",
38 "private_key",
39 "privateKey",
40 "secret_key",
41 "secretKey",
42 "authorization",
43 "auth_token",
44 "session_token",
45 "cookie",
46 "credentials",
47 "ssn",
48 "credit_card",
49 "card_number",
50];
51
52pub struct Redactor {
55 builtin_set: RegexSet,
56 builtin_compiled: Vec<regex::Regex>,
57 custom_set: Option<RegexSet>,
58 custom_compiled: Vec<regex::Regex>,
59}
60
61impl Redactor {
62 pub fn try_new(custom_patterns: &[String]) -> Result<Self, regex::Error> {
68 let builtin_set = RegexSet::new(BUILTIN_PATTERNS)?;
69 let builtin_compiled: Vec<regex::Regex> = BUILTIN_PATTERNS
70 .iter()
71 .filter_map(|p| regex::Regex::new(p).ok())
72 .collect();
73
74 let (custom_set, custom_compiled) = if custom_patterns.is_empty() {
75 (None, Vec::new())
76 } else {
77 let set = RegexSet::new(custom_patterns)?;
78 let compiled: Vec<regex::Regex> = custom_patterns
79 .iter()
80 .map(|p| regex::Regex::new(p))
81 .collect::<Result<Vec<_>, _>>()?;
82 (Some(set), compiled)
83 };
84
85 Ok(Self {
86 builtin_set,
87 builtin_compiled,
88 custom_set,
89 custom_compiled,
90 })
91 }
92
93 pub fn new(custom_patterns: &[String]) -> Self {
98 let (builtin_set, builtin_compiled) = match RegexSet::new(BUILTIN_PATTERNS) {
99 Ok(set) => {
100 let compiled: Vec<regex::Regex> = BUILTIN_PATTERNS
101 .iter()
102 .filter_map(|p| regex::Regex::new(p).ok())
103 .collect();
104 (set, compiled)
105 }
106 Err(e) => {
107 tracing::error!(
108 "BUG: built-in redaction patterns failed to compile: {e}. \
109 Redaction will be disabled."
110 );
111 let empty: Vec<String> = Vec::new();
114 let empty_set = RegexSet::new(&empty).unwrap_or_else(|_| unreachable!());
115 (empty_set, Vec::new())
116 }
117 };
118
119 let (custom_set, custom_compiled) = if custom_patterns.is_empty() {
120 (None, Vec::new())
121 } else {
122 match RegexSet::new(custom_patterns) {
123 Ok(set) => {
124 let compiled: Vec<regex::Regex> = custom_patterns
125 .iter()
126 .filter_map(|p| regex::Regex::new(p).ok())
127 .collect();
128 (Some(set), compiled)
129 }
130 Err(e) => {
131 tracing::warn!("Failed to compile custom redaction patterns: {e}");
132 (None, Vec::new())
133 }
134 }
135 };
136
137 Self {
138 builtin_set,
139 builtin_compiled,
140 custom_set,
141 custom_compiled,
142 }
143 }
144
145 #[must_use]
147 pub fn redact(&self, input: &str) -> String {
148 let mut output = self.redact_regex(input);
149 output = self.redact_json_keys(&output);
150 output
151 }
152
153 fn redact_regex(&self, input: &str) -> String {
154 let has_builtin = self.builtin_set.is_match(input);
155 let has_custom = self.custom_set.as_ref().is_some_and(|c| c.is_match(input));
156
157 if !has_builtin && !has_custom {
158 return input.to_string();
159 }
160
161 let mut output = input.to_string();
162
163 if has_builtin {
164 for re in &self.builtin_compiled {
165 output = re.replace_all(&output, "[REDACTED]").to_string();
166 }
167 }
168
169 if has_custom {
170 for re in &self.custom_compiled {
171 output = re.replace_all(&output, "[REDACTED]").to_string();
172 }
173 }
174
175 output
176 }
177
178 fn redact_json_keys(&self, input: &str) -> String {
179 if let Ok(value) = serde_json::from_str::<serde_json::Value>(input) {
180 if !json_has_sensitive_keys(&value) {
181 return input.to_string();
182 }
183 let redacted = redact_json_value(&value);
184 serde_json::to_string(&redacted).unwrap_or_else(|_| input.to_string())
185 } else {
186 input.to_string()
187 }
188 }
189}
190
191fn json_has_sensitive_keys(value: &serde_json::Value) -> bool {
192 match value {
193 serde_json::Value::Object(map) => {
194 for (key, val) in map {
195 let lower_key = key.to_lowercase();
196 if SENSITIVE_JSON_KEYS
197 .iter()
198 .any(|k| lower_key.contains(&k.to_lowercase()))
199 {
200 return true;
201 }
202 if json_has_sensitive_keys(val) {
203 return true;
204 }
205 }
206 false
207 }
208 serde_json::Value::Array(arr) => arr.iter().any(json_has_sensitive_keys),
209 _ => false,
210 }
211}
212
213fn redact_json_value(value: &serde_json::Value) -> serde_json::Value {
214 match value {
215 serde_json::Value::Object(map) => {
216 let mut new_map = serde_json::Map::new();
217 for (key, val) in map {
218 let lower_key = key.to_lowercase();
219 if SENSITIVE_JSON_KEYS
220 .iter()
221 .any(|k| lower_key.contains(&k.to_lowercase()))
222 {
223 if val.is_string() || val.is_number() {
224 new_map.insert(key.clone(), serde_json::Value::String("[REDACTED]".into()));
225 } else if val.is_boolean() {
226 new_map.insert(key.clone(), val.clone());
228 } else {
229 new_map.insert(key.clone(), serde_json::Value::String("[REDACTED]".into()));
230 }
231 } else {
232 new_map.insert(key.clone(), redact_json_value(val));
233 }
234 }
235 serde_json::Value::Object(new_map)
236 }
237 serde_json::Value::Array(arr) => {
238 serde_json::Value::Array(arr.iter().map(redact_json_value).collect())
239 }
240 other => other.clone(),
241 }
242}
243
244impl Default for Redactor {
245 fn default() -> Self {
246 Self::new(&[])
247 }
248}
249
250#[cfg(test)]
251mod tests {
252 use super::*;
253
254 #[test]
255 fn redacts_api_keys() {
256 let r = Redactor::default();
257 assert!(
258 r.redact("key is sk-abc123def456ghi789jkl012mno")
259 .contains("[REDACTED]")
260 );
261 }
262
263 #[test]
264 fn redacts_bearer_tokens() {
265 let r = Redactor::default();
266 let input = "Authorization: Bearer eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJzdWIiOiIxMjM0NTY3ODkwIn0.dozjgNryP4J3jVmNHl0w5N_XgL0n3I9PlFUP0THsR8U";
267 let output = r.redact(input);
268 assert!(output.contains("[REDACTED]"));
269 assert!(!output.contains("eyJhbGci"));
270 }
271
272 #[test]
273 fn redacts_emails() {
274 let r = Redactor::default();
275 assert!(
276 r.redact("contact user@example.com for help")
277 .contains("[REDACTED]")
278 );
279 }
280
281 #[test]
282 fn passes_through_clean_text() {
283 let r = Redactor::default();
284 let input = r#"{"ok": true, "title": "My App"}"#;
285 assert_eq!(r.redact(input), input);
286 }
287
288 #[test]
289 fn custom_patterns_work() {
290 let r = Redactor::new(&["secret_\\w+".to_string()]);
291 assert!(
292 r.redact("found secret_project_alpha here")
293 .contains("[REDACTED]")
294 );
295 }
296
297 #[test]
298 fn redacts_json_sensitive_keys() {
299 let r = Redactor::default();
300 let input = r#"{"api_key":"sk-test-12345","name":"John","token":"abc123"}"#;
301 let output = r.redact(input);
302 assert!(output.contains("[REDACTED]"));
303 assert!(output.contains("John"));
304 assert!(!output.contains("sk-test-12345"));
305 }
306
307 #[test]
308 fn preserves_boolean_sensitive_keys() {
309 let r = Redactor::default();
310 let input = r#"{"has_api_key":true,"api_key":"secret-value-here"}"#;
311 let output = r.redact(input);
312 let parsed: serde_json::Value = serde_json::from_str(&output).unwrap();
313 assert_eq!(parsed["has_api_key"], serde_json::Value::Bool(true));
314 assert_eq!(
315 parsed["api_key"],
316 serde_json::Value::String("[REDACTED]".into())
317 );
318 }
319
320 #[test]
321 fn redacts_nested_json_keys() {
322 let r = Redactor::default();
323 let input = r#"{"config":{"llm":{"api_key":"sk-live-xxx","model":"gpt-4"}}}"#;
324 let output = r.redact(input);
325 assert!(output.contains("[REDACTED]"));
326 assert!(output.contains("gpt-4"));
327 assert!(!output.contains("sk-live-xxx"));
328 }
329
330 #[test]
331 fn redacts_github_tokens() {
332 let r = Redactor::default();
333 assert!(
334 r.redact("ghp_ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmno")
335 .contains("[REDACTED]")
336 );
337 }
338
339 #[test]
340 fn redacts_stripe_keys() {
341 let r = Redactor::default();
342 assert!(
343 r.redact("sk_test_ABCDEFGHIJKLMNOPQRSTUVWXYZab")
344 .contains("[REDACTED]")
345 );
346 }
347
348 #[test]
349 fn try_new_valid_patterns() {
350 let r = Redactor::try_new(&["secret_\\w+".to_string()]);
351 assert!(r.is_ok());
352 let r = r.unwrap();
353 assert!(r.redact("found secret_alpha here").contains("[REDACTED]"));
354 }
355
356 #[test]
357 fn try_new_invalid_pattern_returns_error() {
358 let r = Redactor::try_new(&["[invalid".to_string()]);
359 assert!(r.is_err());
360 }
361
362 #[test]
363 fn try_new_empty_patterns() {
364 let r = Redactor::try_new(&[]);
365 assert!(r.is_ok());
366 }
367}