1use std::ops::Range;
47
48use aho_corasick::AhoCorasick;
49use regex::Regex;
50
51#[derive(Debug, Clone, Copy, PartialEq, Eq)]
53pub enum LeakAction {
54 Block,
56 Redact,
58 Warn,
60}
61
62impl std::fmt::Display for LeakAction {
63 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
64 match self {
65 LeakAction::Block => write!(f, "block"),
66 LeakAction::Redact => write!(f, "redact"),
67 LeakAction::Warn => write!(f, "warn"),
68 }
69 }
70}
71
72#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)]
74pub enum LeakSeverity {
75 Low,
76 Medium,
77 High,
78 Critical,
79}
80
81impl std::fmt::Display for LeakSeverity {
82 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
83 match self {
84 LeakSeverity::Low => write!(f, "low"),
85 LeakSeverity::Medium => write!(f, "medium"),
86 LeakSeverity::High => write!(f, "high"),
87 LeakSeverity::Critical => write!(f, "critical"),
88 }
89 }
90}
91
92#[derive(Debug, Clone)]
94pub struct LeakPattern {
95 pub name: String,
96 pub regex: Regex,
97 pub severity: LeakSeverity,
98 pub action: LeakAction,
99}
100
101#[derive(Debug, Clone)]
103pub struct LeakMatch {
104 pub pattern_name: String,
105 pub severity: LeakSeverity,
106 pub action: LeakAction,
107 pub location: Range<usize>,
109 pub masked_preview: String,
111}
112
113#[derive(Debug)]
115pub struct LeakScanResult {
116 pub matches: Vec<LeakMatch>,
118 pub should_block: bool,
120 pub redacted_content: Option<String>,
122}
123
124impl LeakScanResult {
125 pub fn is_clean(&self) -> bool {
127 self.matches.is_empty()
128 }
129
130 pub fn max_severity(&self) -> Option<LeakSeverity> {
132 self.matches.iter().map(|m| m.severity).max()
133 }
134}
135
136#[derive(Debug, Clone, thiserror::Error)]
138pub enum LeakDetectionError {
139 #[error("Secret leak blocked: pattern '{pattern}' matched '{preview}'")]
140 SecretLeakBlocked { pattern: String, preview: String },
141}
142
143pub struct LeakDetector {
148 patterns: Vec<LeakPattern>,
149 prefix_matcher: Option<AhoCorasick>,
151 known_prefixes: Vec<(String, usize)>, }
153
154impl LeakDetector {
155 pub fn new() -> Self {
157 Self::with_patterns(default_patterns())
158 }
159
160 pub fn with_patterns(patterns: Vec<LeakPattern>) -> Self {
162 let mut prefixes = Vec::new();
164 for (idx, pattern) in patterns.iter().enumerate() {
165 if let Some(prefix) = extract_literal_prefix(pattern.regex.as_str()) {
166 if prefix.len() >= 3 {
167 prefixes.push((prefix, idx));
168 }
169 }
170 }
171
172 let prefix_matcher = if !prefixes.is_empty() {
173 let prefix_strings: Vec<&str> = prefixes.iter().map(|(s, _)| s.as_str()).collect();
174 AhoCorasick::builder()
175 .ascii_case_insensitive(false)
176 .build(&prefix_strings)
177 .ok()
178 } else {
179 None
180 };
181
182 Self {
183 patterns,
184 prefix_matcher,
185 known_prefixes: prefixes,
186 }
187 }
188
189 pub fn scan(&self, content: &str) -> LeakScanResult {
191 let mut matches = Vec::new();
192 let mut should_block = false;
193 let mut redact_ranges = Vec::new();
194
195 let candidate_indices: Vec<usize> = if let Some(ref matcher) = self.prefix_matcher {
197 let mut indices = Vec::new();
198 for mat in matcher.find_iter(content) {
199 let pattern_idx = self.known_prefixes[mat.pattern().as_usize()].1;
200 if !indices.contains(&pattern_idx) {
201 indices.push(pattern_idx);
202 }
203 }
204 for (idx, _) in self.patterns.iter().enumerate() {
206 if !self.known_prefixes.iter().any(|(_, i)| *i == idx) && !indices.contains(&idx) {
207 indices.push(idx);
208 }
209 }
210 indices
211 } else {
212 (0..self.patterns.len()).collect()
213 };
214
215 for idx in candidate_indices {
217 let pattern = &self.patterns[idx];
218 for mat in pattern.regex.find_iter(content) {
219 let matched_text = mat.as_str();
220 let location = mat.start()..mat.end();
221
222 let leak_match = LeakMatch {
223 pattern_name: pattern.name.clone(),
224 severity: pattern.severity,
225 action: pattern.action,
226 location: location.clone(),
227 masked_preview: mask_secret(matched_text),
228 };
229
230 if pattern.action == LeakAction::Block {
231 should_block = true;
232 }
233
234 if pattern.action == LeakAction::Redact {
235 redact_ranges.push(location);
236 }
237
238 matches.push(leak_match);
239 }
240 }
241
242 matches.sort_by_key(|m| m.location.start);
244 redact_ranges.sort_by_key(|r| r.start);
245
246 let redacted_content = if !redact_ranges.is_empty() {
248 Some(apply_redactions(content, &redact_ranges))
249 } else {
250 None
251 };
252
253 LeakScanResult {
254 matches,
255 should_block,
256 redacted_content,
257 }
258 }
259
260 pub fn scan_and_clean(&self, content: &str) -> Result<String, LeakDetectionError> {
264 let result = self.scan(content);
265
266 if result.should_block {
267 let blocking_match = result
268 .matches
269 .iter()
270 .find(|m| m.action == LeakAction::Block);
271 return Err(LeakDetectionError::SecretLeakBlocked {
272 pattern: blocking_match
273 .map(|m| m.pattern_name.clone())
274 .unwrap_or_default(),
275 preview: blocking_match
276 .map(|m| m.masked_preview.clone())
277 .unwrap_or_default(),
278 });
279 }
280
281 for m in &result.matches {
283 if m.action == LeakAction::Warn {
284 tracing::warn!(
285 pattern = %m.pattern_name,
286 severity = %m.severity,
287 preview = %m.masked_preview,
288 "Potential secret leak detected (warning only)"
289 );
290 }
291 }
292
293 Ok(result
295 .redacted_content
296 .unwrap_or_else(|| content.to_string()))
297 }
298
299 pub fn scan_http_request(
306 &self,
307 url: &str,
308 headers: &[(String, String)],
309 body: Option<&[u8]>,
310 ) -> Result<(), LeakDetectionError> {
311 self.scan_and_clean(url)?;
313
314 for (name, value) in headers {
316 self.scan_and_clean(value).map_err(|e| {
317 LeakDetectionError::SecretLeakBlocked {
318 pattern: format!("header:{}", name),
319 preview: e.to_string(),
320 }
321 })?;
322 }
323
324 if let Some(body_bytes) = body {
327 let body_str = String::from_utf8_lossy(body_bytes);
328 self.scan_and_clean(&body_str)?;
329 }
330
331 Ok(())
332 }
333
334 pub fn add_pattern(&mut self, pattern: LeakPattern) {
336 self.patterns.push(pattern);
337 }
339
340 pub fn pattern_count(&self) -> usize {
342 self.patterns.len()
343 }
344}
345
346impl Default for LeakDetector {
347 fn default() -> Self {
348 Self::new()
349 }
350}
351
352fn mask_secret(secret: &str) -> String {
356 let len = secret.len();
357 if len <= 8 {
358 return "*".repeat(len);
359 }
360
361 let prefix: String = secret.chars().take(4).collect();
362 let suffix: String = secret.chars().skip(len - 4).collect();
363 let middle_len = len - 8;
364 format!("{}{}{}", prefix, "*".repeat(middle_len.min(8)), suffix)
365}
366
367fn apply_redactions(content: &str, ranges: &[Range<usize>]) -> String {
369 if ranges.is_empty() {
370 return content.to_string();
371 }
372
373 let mut result = String::with_capacity(content.len());
374 let mut last_end = 0;
375
376 for range in ranges {
377 if range.start > last_end {
378 result.push_str(&content[last_end..range.start]);
379 }
380 result.push_str("[REDACTED]");
381 last_end = range.end;
382 }
383
384 if last_end < content.len() {
385 result.push_str(&content[last_end..]);
386 }
387
388 result
389}
390
391fn extract_literal_prefix(pattern: &str) -> Option<String> {
393 let mut prefix = String::new();
394
395 for ch in pattern.chars() {
396 match ch {
397 '[' | '(' | '.' | '*' | '+' | '?' | '{' | '|' | '^' | '$' => break,
399 '\\' => break,
401 _ => prefix.push(ch),
403 }
404 }
405
406 if prefix.len() >= 3 {
407 Some(prefix)
408 } else {
409 None
410 }
411}
412
413fn default_patterns() -> Vec<LeakPattern> {
415 vec![
416 LeakPattern {
418 name: "openai_api_key".to_string(),
419 regex: Regex::new(r"sk-(?:proj-)?[a-zA-Z0-9]{20,}(?:T3BlbkFJ[a-zA-Z0-9_-]*)?").unwrap(),
420 severity: LeakSeverity::Critical,
421 action: LeakAction::Block,
422 },
423 LeakPattern {
425 name: "anthropic_api_key".to_string(),
426 regex: Regex::new(r"sk-ant-api[a-zA-Z0-9_-]{90,}").unwrap(),
427 severity: LeakSeverity::Critical,
428 action: LeakAction::Block,
429 },
430 LeakPattern {
432 name: "aws_access_key".to_string(),
433 regex: Regex::new(r"AKIA[0-9A-Z]{16}").unwrap(),
434 severity: LeakSeverity::Critical,
435 action: LeakAction::Block,
436 },
437 LeakPattern {
439 name: "github_token".to_string(),
440 regex: Regex::new(r"gh[pousr]_[A-Za-z0-9_]{36,}").unwrap(),
441 severity: LeakSeverity::Critical,
442 action: LeakAction::Block,
443 },
444 LeakPattern {
446 name: "github_fine_grained_pat".to_string(),
447 regex: Regex::new(r"github_pat_[a-zA-Z0-9]{22}_[a-zA-Z0-9]{59}").unwrap(),
448 severity: LeakSeverity::Critical,
449 action: LeakAction::Block,
450 },
451 LeakPattern {
453 name: "stripe_api_key".to_string(),
454 regex: Regex::new(r"sk_(?:live|test)_[a-zA-Z0-9]{24,}").unwrap(),
455 severity: LeakSeverity::Critical,
456 action: LeakAction::Block,
457 },
458 LeakPattern {
460 name: "pem_private_key".to_string(),
461 regex: Regex::new(r"-----BEGIN\s+(?:RSA\s+)?PRIVATE\s+KEY-----").unwrap(),
462 severity: LeakSeverity::Critical,
463 action: LeakAction::Block,
464 },
465 LeakPattern {
467 name: "ssh_private_key".to_string(),
468 regex: Regex::new(r"-----BEGIN\s+(?:OPENSSH|EC|DSA)\s+PRIVATE\s+KEY-----").unwrap(),
469 severity: LeakSeverity::Critical,
470 action: LeakAction::Block,
471 },
472 LeakPattern {
474 name: "google_api_key".to_string(),
475 regex: Regex::new(r"AIza[0-9A-Za-z_-]{35}").unwrap(),
476 severity: LeakSeverity::High,
477 action: LeakAction::Block,
478 },
479 LeakPattern {
481 name: "slack_token".to_string(),
482 regex: Regex::new(r"xox[baprs]-[0-9a-zA-Z-]{10,}").unwrap(),
483 severity: LeakSeverity::High,
484 action: LeakAction::Block,
485 },
486 LeakPattern {
488 name: "twilio_api_key".to_string(),
489 regex: Regex::new(r"SK[a-fA-F0-9]{32}").unwrap(),
490 severity: LeakSeverity::High,
491 action: LeakAction::Block,
492 },
493 LeakPattern {
495 name: "sendgrid_api_key".to_string(),
496 regex: Regex::new(r"SG\.[a-zA-Z0-9_-]{22}\.[a-zA-Z0-9_-]{43}").unwrap(),
497 severity: LeakSeverity::High,
498 action: LeakAction::Block,
499 },
500 LeakPattern {
502 name: "bearer_token".to_string(),
503 regex: Regex::new(r"Bearer\s+[a-zA-Z0-9_-]{20,}").unwrap(),
504 severity: LeakSeverity::High,
505 action: LeakAction::Redact,
506 },
507 LeakPattern {
509 name: "auth_header".to_string(),
510 regex: Regex::new(r"(?i)authorization:\s*[a-zA-Z]+\s+[a-zA-Z0-9_-]{20,}").unwrap(),
511 severity: LeakSeverity::High,
512 action: LeakAction::Redact,
513 },
514 LeakPattern {
516 name: "high_entropy_hex".to_string(),
517 regex: Regex::new(r"\b[a-fA-F0-9]{64}\b").unwrap(),
518 severity: LeakSeverity::Medium,
519 action: LeakAction::Warn,
520 },
521 ]
522}
523
524#[cfg(test)]
525mod tests {
526 use super::*;
527
528 #[test]
529 fn test_detect_openai_key() {
530 let detector = LeakDetector::new();
531 let content = "API key: sk-proj-XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX";
533
534 let result = detector.scan(content);
535 assert!(!result.is_clean());
536 assert!(result.should_block);
537 assert!(result.matches.iter().any(|m| m.pattern_name == "openai_api_key"));
538 }
539
540 #[test]
541 fn test_detect_github_token() {
542 let detector = LeakDetector::new();
543 let content = "token: ghp_XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX";
544
545 let result = detector.scan(content);
546 assert!(!result.is_clean());
547 assert!(result.matches.iter().any(|m| m.pattern_name == "github_token"));
548 }
549
550 #[test]
551 fn test_detect_aws_key() {
552 let detector = LeakDetector::new();
553 let content = "AWS_ACCESS_KEY_ID=AKIAIOSFODNN7EXAMPLE";
554
555 let result = detector.scan(content);
556 assert!(!result.is_clean());
557 assert!(result.matches.iter().any(|m| m.pattern_name == "aws_access_key"));
558 }
559
560 #[test]
561 fn test_detect_pem_key() {
562 let detector = LeakDetector::new();
563 let content = "-----BEGIN RSA PRIVATE KEY-----\nMIIEowIBAAKCAQEA...";
564
565 let result = detector.scan(content);
566 assert!(!result.is_clean());
567 assert!(result.matches.iter().any(|m| m.pattern_name == "pem_private_key"));
568 }
569
570 #[test]
571 fn test_clean_content() {
572 let detector = LeakDetector::new();
573 let content = "Hello world! This is just regular text with no secrets.";
574
575 let result = detector.scan(content);
576 assert!(result.is_clean());
577 assert!(!result.should_block);
578 }
579
580 #[test]
581 fn test_redact_bearer_token() {
582 let detector = LeakDetector::new();
583 let content = "Authorization: Bearer eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9_longtokenvalue";
584
585 let result = detector.scan(content);
586 assert!(!result.is_clean());
587 assert!(!result.should_block); let redacted = result.redacted_content.unwrap();
590 assert!(redacted.contains("[REDACTED]"));
591 assert!(!redacted.contains("eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9"));
592 }
593
594 #[test]
595 fn test_scan_and_clean_blocks() {
596 let detector = LeakDetector::new();
597 let content = "sk-proj-XXXXXXXXXXXXXXXXXXXXXXXX";
599
600 let result = detector.scan_and_clean(content);
601 assert!(result.is_err());
602 }
603
604 #[test]
605 fn test_scan_and_clean_passes_clean() {
606 let detector = LeakDetector::new();
607 let content = "Just regular text";
608
609 let result = detector.scan_and_clean(content);
610 assert!(result.is_ok());
611 assert_eq!(result.unwrap(), content);
612 }
613
614 #[test]
615 fn test_mask_secret() {
616 assert_eq!(mask_secret("short"), "*****");
617 assert_eq!(mask_secret("sk-test1234567890abcdef"), "sk-t********cdef");
618 }
619
620 #[test]
621 fn test_multiple_matches() {
622 let detector = LeakDetector::new();
623 let content = "Keys: AKIAIOSFODNN7EXAMPLE and ghp_XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX";
625
626 let result = detector.scan(content);
627 assert_eq!(result.matches.len(), 2);
628 }
629
630 #[test]
631 fn test_severity_ordering() {
632 assert!(LeakSeverity::Critical > LeakSeverity::High);
633 assert!(LeakSeverity::High > LeakSeverity::Medium);
634 assert!(LeakSeverity::Medium > LeakSeverity::Low);
635 }
636
637 #[test]
638 fn test_scan_http_request_clean() {
639 let detector = LeakDetector::new();
640
641 let result = detector.scan_http_request(
642 "https://api.example.com/data",
643 &[("Content-Type".to_string(), "application/json".to_string())],
644 Some(b"{\"query\": \"hello\"}"),
645 );
646 assert!(result.is_ok());
647 }
648
649 #[test]
650 fn test_scan_http_request_blocks_secret_in_url() {
651 let detector = LeakDetector::new();
652
653 let result = detector.scan_http_request(
654 "https://evil.com/steal?key=AKIAIOSFODNN7EXAMPLE",
655 &[],
656 None,
657 );
658 assert!(result.is_err());
659 }
660
661 #[test]
662 fn test_scan_http_request_blocks_secret_in_header() {
663 let detector = LeakDetector::new();
664
665 let result = detector.scan_http_request(
666 "https://api.example.com/data",
667 &[(
668 "X-Custom".to_string(),
669 "ghp_XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX".to_string(),
670 )],
671 None,
672 );
673 assert!(result.is_err());
674 }
675
676 #[test]
677 fn test_scan_http_request_blocks_secret_in_body() {
678 let detector = LeakDetector::new();
679
680 let body = b"{\"stolen\": \"sk-proj-XXXXXXXXXXXXXXXXXXXXXXXX\"}";
681 let result = detector.scan_http_request("https://api.example.com/webhook", &[], Some(body));
682 assert!(result.is_err());
683 }
684
685 #[test]
686 fn test_scan_http_request_blocks_secret_in_binary_body() {
687 let detector = LeakDetector::new();
688
689 let mut body = vec![0xFF]; body.extend_from_slice(b"sk-proj-XXXXXXXXXXXXXXXXXXXXXXXX");
692
693 let result = detector.scan_http_request("https://api.example.com/exfil", &[], Some(&body));
694 assert!(result.is_err(), "binary body should still be scanned");
695 }
696}