1use once_cell::sync::Lazy;
2use regex::Regex;
3
4static BUILTIN_PATTERNS: Lazy<Vec<(&'static str, Regex)>> = Lazy::new(|| {
6 vec![
7 (
8 "OpenAI API Key",
9 Regex::new(r"sk-[A-Za-z0-9]{20,}").unwrap(),
10 ),
11 ("AWS Access Key", Regex::new(r"AKIA[A-Z0-9]{16}").unwrap()),
12 ("GitHub PAT", Regex::new(r"ghp_[A-Za-z0-9]{36,}").unwrap()),
13 (
14 "GitHub Server Token",
15 Regex::new(r"ghs_[A-Za-z0-9]{36,}").unwrap(),
16 ),
17 (
18 "Anthropic API Key",
19 Regex::new(r"sk-ant-[A-Za-z0-9\-]{20,}").unwrap(),
20 ),
21 (
22 "Slack Token",
23 Regex::new(r"xox[bprs]-[A-Za-z0-9\-]{10,}").unwrap(),
24 ),
25 (
26 "Email Address",
27 Regex::new(r"[A-Za-z0-9._%+\-]+@[A-Za-z0-9.\-]+\.[A-Za-z]{2,}").unwrap(),
28 ),
29 ]
30});
31
32pub fn redact(input: &str) -> String {
34 let mut result = input.to_string();
35 for (label, regex) in BUILTIN_PATTERNS.iter() {
36 result = regex
37 .replace_all(&result, format!("[REDACTED:{label}]"))
38 .into_owned();
39 }
40 result
41}
42
43pub struct CompiledCustomPatterns {
45 patterns: Vec<Regex>,
46}
47
48impl CompiledCustomPatterns {
49 pub fn new(raw_patterns: &[String]) -> Self {
51 let patterns = raw_patterns
52 .iter()
53 .filter_map(|pat_str| match Regex::new(pat_str) {
54 Ok(re) => Some(re),
55 Err(e) => {
56 eprintln!("tirith: warning: invalid custom DLP pattern '{pat_str}': {e}");
57 None
58 }
59 })
60 .collect();
61 Self { patterns }
62 }
63}
64
65pub fn redact_with_custom(input: &str, custom_patterns: &[String]) -> String {
67 let mut result = redact(input);
68 for pat_str in custom_patterns {
69 if pat_str.len() > 1024 {
70 eprintln!(
71 "tirith: DLP pattern too long ({} chars), skipping",
72 pat_str.len()
73 );
74 continue;
75 }
76 match Regex::new(pat_str) {
77 Ok(re) => {
78 result = re.replace_all(&result, "[REDACTED:custom]").into_owned();
79 }
80 Err(e) => {
81 eprintln!("tirith: warning: invalid custom DLP pattern '{pat_str}': {e}");
82 }
83 }
84 }
85 result
86}
87
88pub fn redact_with_compiled(input: &str, compiled: &CompiledCustomPatterns) -> String {
90 let mut result = redact(input);
91 for re in &compiled.patterns {
92 result = re.replace_all(&result, "[REDACTED:custom]").into_owned();
93 }
94 result
95}
96
97pub fn redact_finding(finding: &mut crate::verdict::Finding, custom_patterns: &[String]) {
99 finding.title = redact_with_custom(&finding.title, custom_patterns);
100 finding.description = redact_with_custom(&finding.description, custom_patterns);
101 if let Some(ref mut v) = finding.human_view {
102 *v = redact_with_custom(v, custom_patterns);
103 }
104 if let Some(ref mut v) = finding.agent_view {
105 *v = redact_with_custom(v, custom_patterns);
106 }
107 for ev in &mut finding.evidence {
108 redact_evidence(ev, custom_patterns);
109 }
110}
111
112fn redact_evidence(ev: &mut crate::verdict::Evidence, custom_patterns: &[String]) {
113 use crate::verdict::Evidence;
114 match ev {
115 Evidence::Url { raw } => {
116 *raw = redact_with_custom(raw, custom_patterns);
117 }
118 Evidence::CommandPattern { matched, .. } => {
119 *matched = redact_with_custom(matched, custom_patterns);
120 }
121 Evidence::EnvVar { value_preview, .. } => {
122 *value_preview = redact_with_custom(value_preview, custom_patterns);
123 }
124 Evidence::Text { detail } => {
125 *detail = redact_with_custom(detail, custom_patterns);
126 }
127 Evidence::ByteSequence { description, .. } => {
128 *description = redact_with_custom(description, custom_patterns);
129 }
130 _ => {}
132 }
133}
134
135pub fn redact_verdict(verdict: &mut crate::verdict::Verdict, custom_patterns: &[String]) {
137 for f in &mut verdict.findings {
138 redact_finding(f, custom_patterns);
139 }
140}
141
142pub fn redact_findings(findings: &mut [crate::verdict::Finding], custom_patterns: &[String]) {
144 for f in findings.iter_mut() {
145 redact_finding(f, custom_patterns);
146 }
147}
148
149#[cfg(test)]
150mod tests {
151 use super::*;
152
153 #[test]
154 fn test_redact_openai_key() {
155 let input = "export OPENAI_API_KEY=sk-abcdefghijklmnopqrstuvwxyz12345678";
156 let redacted = redact(input);
157 assert!(!redacted.contains("sk-abcdef"));
158 assert!(redacted.contains("[REDACTED:OpenAI API Key]"));
159 }
160
161 #[test]
162 fn test_redact_aws_key() {
163 let input = "AWS_ACCESS_KEY_ID=AKIAIOSFODNN7EXAMPLE";
164 let redacted = redact(input);
165 assert!(!redacted.contains("AKIAIOSFODNN7EXAMPLE"));
166 assert!(redacted.contains("[REDACTED:AWS Access Key]"));
167 }
168
169 #[test]
170 fn test_redact_github_pat() {
171 let input = "GITHUB_TOKEN=ghp_ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijkl";
172 let redacted = redact(input);
173 assert!(!redacted.contains("ghp_ABCDEF"));
174 assert!(redacted.contains("[REDACTED:GitHub PAT]"));
175 }
176
177 #[test]
178 fn test_redact_email() {
179 let input = "contact: user@example.com for details";
180 let redacted = redact(input);
181 assert!(!redacted.contains("user@example.com"));
182 assert!(redacted.contains("[REDACTED:Email Address]"));
183 }
184
185 #[test]
186 fn test_redact_no_false_positive() {
187 let input = "normal text without any secrets";
188 let redacted = redact(input);
189 assert_eq!(input, redacted);
190 }
191
192 #[test]
193 fn test_redact_with_custom() {
194 let input = "internal ref: PROJ-12345 in the system";
195 let custom = vec![r"PROJ-\d+".to_string()];
196 let redacted = redact_with_custom(input, &custom);
197 assert!(!redacted.contains("PROJ-12345"));
198 assert!(redacted.contains("[REDACTED:custom]"));
199 }
200
201 #[test]
202 fn test_redact_anthropic_key() {
203 let input = "ANTHROPIC_API_KEY=sk-ant-api03-abcdefghijklmnop";
204 let redacted = redact(input);
205 assert!(!redacted.contains("sk-ant-api03"));
206 assert!(redacted.contains("[REDACTED:Anthropic API Key]"));
207 }
208
209 #[test]
210 fn test_redact_finding_covers_all_fields() {
211 use crate::verdict::{Evidence, Finding, RuleId, Severity};
212
213 let mut finding = Finding {
214 rule_id: RuleId::SensitiveEnvExport,
215 severity: Severity::High,
216 title: "test".into(),
217 description: "exports sk-abcdefghijklmnopqrstuvwxyz12345678".into(),
218 evidence: vec![
219 Evidence::EnvVar {
220 name: "OPENAI_API_KEY".into(),
221 value_preview: "sk-abcdefghijklmnopqrstuvwxyz12345678".into(),
222 },
223 Evidence::Text {
224 detail: "saw ghp_ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijkl".into(),
225 },
226 Evidence::CommandPattern {
227 pattern: "export".into(),
228 matched: "export OPENAI_API_KEY=sk-abcdefghijklmnopqrstuvwxyz12345678".into(),
229 },
230 ],
231 human_view: Some("key is sk-abcdefghijklmnopqrstuvwxyz12345678".into()),
232 agent_view: Some("AKIAIOSFODNN7EXAMPLE exposed".into()),
233 mitre_id: None,
234 custom_rule_id: None,
235 };
236
237 redact_finding(&mut finding, &[]);
238
239 assert!(finding.description.contains("[REDACTED:OpenAI API Key]"));
241 assert!(!finding.description.contains("sk-abcdef"));
242
243 match &finding.evidence[0] {
245 Evidence::EnvVar { value_preview, .. } => {
246 assert!(value_preview.contains("[REDACTED:OpenAI API Key]"));
247 }
248 _ => panic!("expected EnvVar"),
249 }
250 match &finding.evidence[1] {
251 Evidence::Text { detail } => {
252 assert!(detail.contains("[REDACTED:GitHub PAT]"));
253 }
254 _ => panic!("expected Text"),
255 }
256 match &finding.evidence[2] {
257 Evidence::CommandPattern { matched, .. } => {
258 assert!(matched.contains("[REDACTED:OpenAI API Key]"));
259 }
260 _ => panic!("expected CommandPattern"),
261 }
262
263 assert!(finding
265 .human_view
266 .as_ref()
267 .unwrap()
268 .contains("[REDACTED:OpenAI API Key]"));
269 assert!(finding
270 .agent_view
271 .as_ref()
272 .unwrap()
273 .contains("[REDACTED:AWS Access Key]"));
274 }
275}