Skip to main content

tracevault_core/
redact.rs

1use regex::Regex;
2
3pub struct Redactor {
4    patterns: Vec<Regex>,
5    high_entropy_pattern: Regex,
6}
7
8const REDACTED: &str = "[REDACTED]";
9
10impl Redactor {
11    pub fn new() -> Self {
12        let patterns = [
13            // AWS Access Key
14            r"AKIA[0-9A-Z]{16}",
15            // GitHub token
16            r"gh[ps]_[A-Za-z0-9]{36,}",
17            // Generic API key patterns
18            r#"(?i)(api[_-]?key|apikey|secret[_-]?key)\s*[:=]\s*["']?[A-Za-z0-9/+=]{20,}"#,
19            // JWT
20            r"eyJ[A-Za-z0-9_-]+\.eyJ[A-Za-z0-9_-]+\.[A-Za-z0-9_-]+",
21            // RSA private key header
22            r"-----BEGIN (?:RSA )?PRIVATE KEY-----",
23            // Slack token
24            r"xox[bpras]-[0-9A-Za-z\-]+",
25            // Generic bearer token
26            r"(?i)bearer\s+[A-Za-z0-9\-._~+/]+=*",
27        ];
28
29        Self {
30            patterns: patterns.iter().map(|p| Regex::new(p).unwrap()).collect(),
31            high_entropy_pattern: Regex::new(r"[A-Za-z0-9/+_=\-]{16,}").unwrap(),
32        }
33    }
34
35    pub fn redact_string(&self, input: &str) -> String {
36        let mut result = input.to_string();
37
38        // Pattern-based redaction first
39        for pattern in &self.patterns {
40            result = pattern.replace_all(&result, REDACTED).to_string();
41        }
42
43        // Entropy-based redaction
44        let entropy_re = &self.high_entropy_pattern;
45        result = entropy_re
46            .replace_all(&result, |caps: &regex::Captures| {
47                let matched = caps.get(0).unwrap().as_str();
48                if shannon_entropy(matched) > 4.5 {
49                    REDACTED.to_string()
50                } else {
51                    matched.to_string()
52                }
53            })
54            .to_string();
55
56        result
57    }
58}
59
60impl Default for Redactor {
61    fn default() -> Self {
62        Self::new()
63    }
64}
65
66fn shannon_entropy(s: &str) -> f64 {
67    if s.is_empty() {
68        return 0.0;
69    }
70    let mut freq = [0u32; 256];
71    for b in s.bytes() {
72        freq[b as usize] += 1;
73    }
74    let len = s.len() as f64;
75    freq.iter()
76        .filter(|&&c| c > 0)
77        .map(|&c| {
78            let p = c as f64 / len;
79            -p * p.log2()
80        })
81        .sum()
82}