1use once_cell::sync::Lazy;
36use regex::Regex;
37
38const REPLACEMENT: &str = "[REDACTED]";
39
40#[derive(Debug, Clone, PartialEq, Eq)]
41pub struct RedactReport {
42 pub redacted: String,
43 pub hits: Vec<RedactHit>,
44}
45
46impl RedactReport {
47 pub fn is_clean(&self) -> bool {
48 self.hits.is_empty()
49 }
50
51 pub fn kinds(&self) -> Vec<String> {
53 let mut names: Vec<String> = self.hits.iter().map(|h| h.kind.clone()).collect();
54 names.sort();
55 names.dedup();
56 names
57 }
58}
59
60#[derive(Debug, Clone, PartialEq, Eq)]
61pub struct RedactHit {
62 pub kind: String,
63 pub count: usize,
64}
65
66struct Pattern {
67 kind: &'static str,
68 regex: Regex,
69}
70
71static PATTERNS: Lazy<Vec<Pattern>> = Lazy::new(|| {
75 vec![
76 Pattern {
77 kind: "openai_api_key",
78 regex: Regex::new(r"sk-(?:proj-)?[A-Za-z0-9_\-]{20,}").unwrap(),
81 },
82 Pattern {
83 kind: "openai_pk_key",
84 regex: Regex::new(r"\bpk-[A-Za-z0-9_\-]{20,}").unwrap(),
85 },
86 Pattern {
87 kind: "anthropic_api_key",
88 regex: Regex::new(r"sk-ant-[A-Za-z0-9_\-]{20,}").unwrap(),
89 },
90 Pattern {
91 kind: "github_token",
92 regex: Regex::new(r"\bgh[pousr]_[A-Za-z0-9]{30,}").unwrap(),
93 },
94 Pattern {
95 kind: "slack_bot_token",
96 regex: Regex::new(r"\bxox[baprs]-[A-Za-z0-9-]{10,}").unwrap(),
97 },
98 Pattern {
99 kind: "google_api_key",
100 regex: Regex::new(r"\bAIza[0-9A-Za-z_\-]{35}").unwrap(),
101 },
102 Pattern {
103 kind: "aws_access_key_id",
104 regex: Regex::new(r"\bAKIA[0-9A-Z]{16}\b").unwrap(),
105 },
106 Pattern {
110 kind: "bearer_token",
111 regex: Regex::new(r"(?i)Bearer\s+[A-Za-z0-9_\-\.=]{20,}").unwrap(),
112 },
113 Pattern {
117 kind: "jwt",
118 regex: Regex::new(
119 r"\beyJ[A-Za-z0-9_\-]{10,}\.[A-Za-z0-9_\-]{10,}\.[A-Za-z0-9_\-]{10,}\b",
120 )
121 .unwrap(),
122 },
123 ]
124});
125
126pub fn redact(text: &str) -> RedactReport {
129 let mut current = text.to_string();
130 let mut hits = Vec::new();
131 for pattern in PATTERNS.iter() {
132 let count = pattern.regex.find_iter(¤t).count();
133 if count == 0 {
134 continue;
135 }
136 current = pattern
137 .regex
138 .replace_all(¤t, REPLACEMENT)
139 .into_owned();
140 hits.push(RedactHit {
141 kind: pattern.kind.to_string(),
142 count,
143 });
144 }
145 RedactReport {
146 redacted: current,
147 hits,
148 }
149}
150
151#[cfg(test)]
152mod tests {
153 use super::*;
154
155 #[test]
156 fn redact_returns_unchanged_text_when_clean() {
157 let report = redact("hello world");
158 assert_eq!(report.redacted, "hello world");
159 assert!(report.is_clean());
160 assert!(report.kinds().is_empty());
161 }
162
163 #[test]
164 fn redact_strips_openai_key() {
165 let raw = "use sk-abc1234567890DEFGHIJ for the call";
166 let report = redact(raw);
167 assert!(report.redacted.contains(REPLACEMENT));
168 assert!(!report.redacted.contains("sk-abc"));
169 assert_eq!(report.kinds(), vec!["openai_api_key"]);
170 assert_eq!(report.hits[0].count, 1);
171 }
172
173 #[test]
174 fn redact_strips_anthropic_key_first_when_overlap() {
175 let raw = "key=sk-ant-abc1234567890DEFGHIJ done";
182 let report = redact(raw);
183 assert_eq!(report.kinds().len(), 1, "{:?}", report.kinds());
184 assert!(report.redacted.contains(REPLACEMENT));
185 }
186
187 #[test]
188 fn redact_strips_github_token() {
189 let raw = "token=ghp_abcdefghijklmnop1234567890ABCDEF rest";
190 let report = redact(raw);
191 assert!(report.redacted.contains(REPLACEMENT));
192 assert_eq!(report.kinds(), vec!["github_token"]);
193 }
194
195 #[test]
196 fn redact_strips_bearer_token_keeping_surrounding_text() {
197 let raw = "Authorization: Bearer eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9 ok";
198 let report = redact(raw);
199 assert!(report.redacted.contains("Authorization:"));
200 assert!(report.redacted.contains(REPLACEMENT));
201 assert!(!report.redacted.contains("eyJhbGc"));
205 assert!(report.kinds().contains(&"bearer_token".to_string()));
210 }
211
212 #[test]
213 fn redact_strips_slack_token() {
214 let raw = "send to xoxb-12345678901-aBcDeFgHiJkLmN done";
215 let report = redact(raw);
216 assert!(report.redacted.contains(REPLACEMENT));
217 assert_eq!(report.kinds(), vec!["slack_bot_token"]);
218 }
219
220 #[test]
221 fn redact_strips_google_api_key() {
222 let raw = "key=AIzaSy0123456789ABCDEF0123456789ABCDEF012 rest";
223 let report = redact(raw);
224 assert!(report.redacted.contains(REPLACEMENT));
225 assert_eq!(report.kinds(), vec!["google_api_key"]);
226 }
227
228 #[test]
229 fn redact_strips_aws_access_key() {
230 let raw = "AKIAIOSFODNN7EXAMPLE is the key";
231 let report = redact(raw);
232 assert!(report.redacted.contains(REPLACEMENT));
233 assert_eq!(report.kinds(), vec!["aws_access_key_id"]);
234 }
235
236 #[test]
237 fn redact_strips_jwt_when_standalone() {
238 let raw = "tok=eyJabc1234567890.eyJpYXQiOjE3MDA.signaturE12345 done";
240 let report = redact(raw);
241 assert!(report.redacted.contains(REPLACEMENT));
242 assert!(report.kinds().contains(&"jwt".to_string()));
243 }
244
245 #[test]
246 fn redact_records_repeat_count_for_same_pattern() {
247 let raw = "first=sk-aaa1234567890ABCDEFGHIJ second=sk-bbb1234567890ABCDEFGHIJ";
248 let report = redact(raw);
249 assert_eq!(report.hits.len(), 1);
250 assert_eq!(report.hits[0].kind, "openai_api_key");
251 assert_eq!(report.hits[0].count, 2);
252 }
253
254 #[test]
255 fn redact_handles_multiple_kinds_in_one_text() {
256 let raw = "use sk-abc1234567890ABCDEFGHIJ and ghp_abcdefghijklmnop1234567890ABCDEF";
257 let report = redact(raw);
258 let kinds = report.kinds();
259 assert!(kinds.contains(&"openai_api_key".to_string()));
260 assert!(kinds.contains(&"github_token".to_string()));
261 assert!(report.redacted.matches(REPLACEMENT).count() >= 2);
262 }
263
264 #[test]
265 fn redact_does_not_match_short_obvious_non_secrets() {
266 let raw = "id=sk-abc1 short=ghp_abc";
268 let report = redact(raw);
269 assert!(report.is_clean());
270 assert_eq!(report.redacted, raw);
271 }
272
273 #[test]
274 fn redact_does_not_corrupt_unicode() {
275 let raw = "测试 sk-abc1234567890DEFGHIJ 完成";
276 let report = redact(raw);
277 assert!(report.redacted.contains("测试"));
278 assert!(report.redacted.contains("完成"));
279 assert!(report.redacted.contains(REPLACEMENT));
280 }
281
282 #[test]
283 fn kinds_sorted_and_deduped() {
284 let report = RedactReport {
285 redacted: "[REDACTED]".into(),
286 hits: vec![
287 RedactHit {
288 kind: "github_token".into(),
289 count: 1,
290 },
291 RedactHit {
292 kind: "github_token".into(),
293 count: 2,
294 },
295 RedactHit {
296 kind: "openai_api_key".into(),
297 count: 1,
298 },
299 ],
300 };
301 assert_eq!(report.kinds(), vec!["github_token", "openai_api_key"]);
302 }
303}