Skip to main content

tirith_core/
redact.rs

1use once_cell::sync::Lazy;
2use regex::Regex;
3
4/// Built-in redaction patterns: (label, regex).
5static BUILTIN_PATTERNS: Lazy<Vec<(&'static str, Regex)>> = Lazy::new(|| {
6    vec![
7        (
8            "OpenAI API Key",
9            Regex::new(r"sk-[A-Za-z0-9]{20,}").unwrap(),
10        ),
11        ("AWS Access Key", Regex::new(r"AKIA[A-Z0-9]{16}").unwrap()),
12        ("GitHub PAT", Regex::new(r"ghp_[A-Za-z0-9]{36,}").unwrap()),
13        (
14            "GitHub Server Token",
15            Regex::new(r"ghs_[A-Za-z0-9]{36,}").unwrap(),
16        ),
17        (
18            "Anthropic API Key",
19            Regex::new(r"sk-ant-[A-Za-z0-9\-]{20,}").unwrap(),
20        ),
21        (
22            "Slack Token",
23            Regex::new(r"xox[bprs]-[A-Za-z0-9\-]{10,}").unwrap(),
24        ),
25        (
26            "Email Address",
27            Regex::new(r"[A-Za-z0-9._%+\-]+@[A-Za-z0-9.\-]+\.[A-Za-z]{2,}").unwrap(),
28        ),
29    ]
30});
31
32/// Redact sensitive content from a string using built-in patterns.
33pub fn redact(input: &str) -> String {
34    let mut result = input.to_string();
35    for (label, regex) in BUILTIN_PATTERNS.iter() {
36        result = regex
37            .replace_all(&result, format!("[REDACTED:{label}]"))
38            .into_owned();
39    }
40    result
41}
42
43/// Pre-compiled set of custom DLP patterns.
44pub struct CompiledCustomPatterns {
45    patterns: Vec<Regex>,
46}
47
48impl CompiledCustomPatterns {
49    /// Compile custom DLP patterns once for reuse across multiple redaction calls.
50    pub fn new(raw_patterns: &[String]) -> Self {
51        let patterns = raw_patterns
52            .iter()
53            .filter_map(|pat_str| match Regex::new(pat_str) {
54                Ok(re) => Some(re),
55                Err(e) => {
56                    eprintln!("tirith: warning: invalid custom DLP pattern '{pat_str}': {e}");
57                    None
58                }
59            })
60            .collect();
61        Self { patterns }
62    }
63}
64
65/// Redact using both built-in and custom patterns from policy.
66pub fn redact_with_custom(input: &str, custom_patterns: &[String]) -> String {
67    let mut result = redact(input);
68    for pat_str in custom_patterns {
69        if pat_str.len() > 1024 {
70            eprintln!(
71                "tirith: DLP pattern too long ({} chars), skipping",
72                pat_str.len()
73            );
74            continue;
75        }
76        match Regex::new(pat_str) {
77            Ok(re) => {
78                result = re.replace_all(&result, "[REDACTED:custom]").into_owned();
79            }
80            Err(e) => {
81                eprintln!("tirith: warning: invalid custom DLP pattern '{pat_str}': {e}");
82            }
83        }
84    }
85    result
86}
87
88/// Redact using built-in patterns and pre-compiled custom patterns (avoids per-call recompilation).
89pub fn redact_with_compiled(input: &str, compiled: &CompiledCustomPatterns) -> String {
90    let mut result = redact(input);
91    for re in &compiled.patterns {
92        result = re.replace_all(&result, "[REDACTED:custom]").into_owned();
93    }
94    result
95}
96
97/// Redact sensitive content from a Finding's string fields in-place.
98pub fn redact_finding(finding: &mut crate::verdict::Finding, custom_patterns: &[String]) {
99    finding.title = redact_with_custom(&finding.title, custom_patterns);
100    finding.description = redact_with_custom(&finding.description, custom_patterns);
101    if let Some(ref mut v) = finding.human_view {
102        *v = redact_with_custom(v, custom_patterns);
103    }
104    if let Some(ref mut v) = finding.agent_view {
105        *v = redact_with_custom(v, custom_patterns);
106    }
107    for ev in &mut finding.evidence {
108        redact_evidence(ev, custom_patterns);
109    }
110}
111
112fn redact_evidence(ev: &mut crate::verdict::Evidence, custom_patterns: &[String]) {
113    use crate::verdict::Evidence;
114    match ev {
115        Evidence::Url { raw } => {
116            *raw = redact_with_custom(raw, custom_patterns);
117        }
118        Evidence::CommandPattern { matched, .. } => {
119            *matched = redact_with_custom(matched, custom_patterns);
120        }
121        Evidence::EnvVar { value_preview, .. } => {
122            *value_preview = redact_with_custom(value_preview, custom_patterns);
123        }
124        Evidence::Text { detail } => {
125            *detail = redact_with_custom(detail, custom_patterns);
126        }
127        Evidence::ByteSequence { description, .. } => {
128            *description = redact_with_custom(description, custom_patterns);
129        }
130        // HostComparison and HomoglyphAnalysis contain domain names / char analysis, not user content
131        _ => {}
132    }
133}
134
135/// Redact all findings in a verdict in-place.
136pub fn redact_verdict(verdict: &mut crate::verdict::Verdict, custom_patterns: &[String]) {
137    for f in &mut verdict.findings {
138        redact_finding(f, custom_patterns);
139    }
140}
141
142/// Redact all findings in a slice in-place.
143pub fn redact_findings(findings: &mut [crate::verdict::Finding], custom_patterns: &[String]) {
144    for f in findings.iter_mut() {
145        redact_finding(f, custom_patterns);
146    }
147}
148
149#[cfg(test)]
150mod tests {
151    use super::*;
152
153    #[test]
154    fn test_redact_openai_key() {
155        let input = "export OPENAI_API_KEY=sk-abcdefghijklmnopqrstuvwxyz12345678";
156        let redacted = redact(input);
157        assert!(!redacted.contains("sk-abcdef"));
158        assert!(redacted.contains("[REDACTED:OpenAI API Key]"));
159    }
160
161    #[test]
162    fn test_redact_aws_key() {
163        let input = "AWS_ACCESS_KEY_ID=AKIAIOSFODNN7EXAMPLE";
164        let redacted = redact(input);
165        assert!(!redacted.contains("AKIAIOSFODNN7EXAMPLE"));
166        assert!(redacted.contains("[REDACTED:AWS Access Key]"));
167    }
168
169    #[test]
170    fn test_redact_github_pat() {
171        let input = "GITHUB_TOKEN=ghp_ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijkl";
172        let redacted = redact(input);
173        assert!(!redacted.contains("ghp_ABCDEF"));
174        assert!(redacted.contains("[REDACTED:GitHub PAT]"));
175    }
176
177    #[test]
178    fn test_redact_email() {
179        let input = "contact: user@example.com for details";
180        let redacted = redact(input);
181        assert!(!redacted.contains("user@example.com"));
182        assert!(redacted.contains("[REDACTED:Email Address]"));
183    }
184
185    #[test]
186    fn test_redact_no_false_positive() {
187        let input = "normal text without any secrets";
188        let redacted = redact(input);
189        assert_eq!(input, redacted);
190    }
191
192    #[test]
193    fn test_redact_with_custom() {
194        let input = "internal ref: PROJ-12345 in the system";
195        let custom = vec![r"PROJ-\d+".to_string()];
196        let redacted = redact_with_custom(input, &custom);
197        assert!(!redacted.contains("PROJ-12345"));
198        assert!(redacted.contains("[REDACTED:custom]"));
199    }
200
201    #[test]
202    fn test_redact_anthropic_key() {
203        let input = "ANTHROPIC_API_KEY=sk-ant-api03-abcdefghijklmnop";
204        let redacted = redact(input);
205        assert!(!redacted.contains("sk-ant-api03"));
206        assert!(redacted.contains("[REDACTED:Anthropic API Key]"));
207    }
208
209    #[test]
210    fn test_redact_finding_covers_all_fields() {
211        use crate::verdict::{Evidence, Finding, RuleId, Severity};
212
213        let mut finding = Finding {
214            rule_id: RuleId::SensitiveEnvExport,
215            severity: Severity::High,
216            title: "test".into(),
217            description: "exports sk-abcdefghijklmnopqrstuvwxyz12345678".into(),
218            evidence: vec![
219                Evidence::EnvVar {
220                    name: "OPENAI_API_KEY".into(),
221                    value_preview: "sk-abcdefghijklmnopqrstuvwxyz12345678".into(),
222                },
223                Evidence::Text {
224                    detail: "saw ghp_ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijkl".into(),
225                },
226                Evidence::CommandPattern {
227                    pattern: "export".into(),
228                    matched: "export OPENAI_API_KEY=sk-abcdefghijklmnopqrstuvwxyz12345678".into(),
229                },
230            ],
231            human_view: Some("key is sk-abcdefghijklmnopqrstuvwxyz12345678".into()),
232            agent_view: Some("AKIAIOSFODNN7EXAMPLE exposed".into()),
233            mitre_id: None,
234            custom_rule_id: None,
235        };
236
237        redact_finding(&mut finding, &[]);
238
239        // description redacted
240        assert!(finding.description.contains("[REDACTED:OpenAI API Key]"));
241        assert!(!finding.description.contains("sk-abcdef"));
242
243        // evidence redacted
244        match &finding.evidence[0] {
245            Evidence::EnvVar { value_preview, .. } => {
246                assert!(value_preview.contains("[REDACTED:OpenAI API Key]"));
247            }
248            _ => panic!("expected EnvVar"),
249        }
250        match &finding.evidence[1] {
251            Evidence::Text { detail } => {
252                assert!(detail.contains("[REDACTED:GitHub PAT]"));
253            }
254            _ => panic!("expected Text"),
255        }
256        match &finding.evidence[2] {
257            Evidence::CommandPattern { matched, .. } => {
258                assert!(matched.contains("[REDACTED:OpenAI API Key]"));
259            }
260            _ => panic!("expected CommandPattern"),
261        }
262
263        // human_view / agent_view redacted
264        assert!(finding
265            .human_view
266            .as_ref()
267            .unwrap()
268            .contains("[REDACTED:OpenAI API Key]"));
269        assert!(finding
270            .agent_view
271            .as_ref()
272            .unwrap()
273            .contains("[REDACTED:AWS Access Key]"));
274    }
275}