Skip to main content

codex_recall/
redact.rs

1use regex::Regex;
2use std::sync::OnceLock;
3
4const REDACTED: &str = "[REDACTED]";
5
6pub fn redact_secrets(text: &str) -> String {
7    if !has_secret_indicator(text) {
8        return text.to_owned();
9    }
10
11    let text = env_assignment_regex()
12        .replace_all(text, |captures: &regex::Captures<'_>| {
13            format!("{}={}", &captures[1], REDACTED)
14        })
15        .into_owned();
16    let text = bearer_regex()
17        .replace_all(&text, |captures: &regex::Captures<'_>| {
18            format!("{} {}", &captures[1], REDACTED)
19        })
20        .into_owned();
21    let text = key_value_regex()
22        .replace_all(&text, |captures: &regex::Captures<'_>| {
23            format!("{}{}", &captures[1], REDACTED)
24        })
25        .into_owned();
26    let text = private_key_block_regex()
27        .replace_all(&text, REDACTED)
28        .into_owned();
29    token_regex().replace_all(&text, REDACTED).into_owned()
30}
31
32fn has_secret_indicator(text: &str) -> bool {
33    [
34        "TOKEN",
35        "token",
36        "SECRET",
37        "secret",
38        "PASSWORD",
39        "password",
40        "PASS",
41        "API_KEY",
42        "api_key",
43        "api-key",
44        "ACCESS_KEY",
45        "access_key",
46        "PRIVATE_KEY",
47        "private_key",
48        "PRIVATE KEY",
49        "private key",
50        "BEGIN PRIVATE KEY",
51        "DSN",
52        "dsn",
53        "COOKIE",
54        "cookie",
55        "Authorization",
56        "authorization",
57        "Bearer ",
58        "bearer ",
59        "sk-",
60        "github_pat_",
61        "ghp_",
62        "gho_",
63        "ghu_",
64        "ghs_",
65        "ghr_",
66        "xox",
67    ]
68    .iter()
69    .any(|needle| text.contains(needle))
70}
71
72fn env_assignment_regex() -> &'static Regex {
73    static REGEX: OnceLock<Regex> = OnceLock::new();
74    REGEX.get_or_init(|| {
75        Regex::new(
76            r#"(?i)\b([A-Z0-9_]*(?:TOKEN|SECRET|PASSWORD|PASS|API_KEY|ACCESS_KEY|PRIVATE_KEY|DSN|COOKIE|AUTHORIZATION)[A-Z0-9_]*)\s*=\s*([^\s"']+)"#,
77        )
78        .expect("env assignment redaction regex compiles")
79    })
80}
81
82fn key_value_regex() -> &'static Regex {
83    static REGEX: OnceLock<Regex> = OnceLock::new();
84    REGEX.get_or_init(|| {
85        Regex::new(
86            r#"(?i)(["']?(?:token|secret|password|api[_-]?key|access[_-]?key|private[_-]?key|dsn|cookie)["']?\s*[:=]\s*)(?:"[^"]*"|'[^']*'|[^\s,}]+)"#,
87        )
88        .expect("key-value redaction regex compiles")
89    })
90}
91
92fn bearer_regex() -> &'static Regex {
93    static REGEX: OnceLock<Regex> = OnceLock::new();
94    REGEX.get_or_init(|| {
95        Regex::new(r#"(?i)\b(Bearer)\s+[A-Za-z0-9._~+/=-]{12,}"#)
96            .expect("bearer redaction regex compiles")
97    })
98}
99
100fn token_regex() -> &'static Regex {
101    static REGEX: OnceLock<Regex> = OnceLock::new();
102    REGEX.get_or_init(|| {
103        Regex::new(
104            r#"\b(?:sk-[A-Za-z0-9_-]{16,}|github_pat_[A-Za-z0-9_]{20,}|gh[pousr]_[A-Za-z0-9_]{20,}|xox[baprs]-[A-Za-z0-9-]{16,})\b"#,
105        )
106        .expect("token redaction regex compiles")
107    })
108}
109
110fn private_key_block_regex() -> &'static Regex {
111    static REGEX: OnceLock<Regex> = OnceLock::new();
112    REGEX.get_or_init(|| {
113        Regex::new(r#"(?is)-----BEGIN [A-Z ]*PRIVATE KEY-----.*?-----END [A-Z ]*PRIVATE KEY-----"#)
114            .expect("private key block redaction regex compiles")
115    })
116}
117
118#[cfg(test)]
119mod tests {
120    use super::*;
121
122    fn joined(parts: &[&str]) -> String {
123        parts.concat()
124    }
125
126    #[test]
127    fn redacts_credential_assignments_without_redacting_plain_language() {
128        let text =
129            "The webhook secret was missing. API_KEY=abc123456789 SENTRY_DSN=https://public@dsn";
130
131        let redacted = redact_secrets(text);
132
133        assert!(redacted.contains("The webhook secret was missing."));
134        assert!(redacted.contains("API_KEY=[REDACTED]"));
135        assert!(redacted.contains("SENTRY_DSN=[REDACTED]"));
136        assert!(!redacted.contains("abc123456789"));
137        assert!(!redacted.contains("public@dsn"));
138    }
139
140    #[test]
141    fn redacts_auth_headers_and_common_token_prefixes() {
142        let github_pat = joined(&["github", "_pat_", "1234567890abcdefghijklmnop"]);
143        let openai_key = joined(&["sk", "-", "abcdefghijklmnopqrstuvwxyz"]);
144        let text = format!(
145            r#"{{"authorization":"Bearer eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9","token":"{github_pat}","openai":"{openai_key}"}}"#
146        );
147
148        let redacted = redact_secrets(&text);
149
150        assert!(redacted.contains(r#""authorization":"Bearer [REDACTED]""#));
151        assert!(redacted.contains(r#""token":[REDACTED]"#));
152        assert!(redacted.contains(r#""openai":"[REDACTED]""#));
153        assert!(!redacted.contains("eyJhbGci"));
154        assert!(!redacted.contains(&github_pat));
155        assert!(!redacted.contains(&openai_key));
156    }
157
158    #[test]
159    fn redacts_fixture_corpus_without_leaking_secret_values() {
160        let openai_key = joined(&["sk", "-proj-", "1234567890abcdefghijklmnop"]);
161        let stripe_webhook_secret = joined(&["whsec", "_", "1234567890abcdefghijklmnop"]);
162        let fixtures = [
163            (format!("OPENAI_API_KEY = \"{openai_key}\""), openai_key),
164            (
165                format!("STRIPE_WEBHOOK_SECRET='{stripe_webhook_secret}'"),
166                stripe_webhook_secret,
167            ),
168            (
169                "Authorization: Bearer abcdefghijklmnopqrstuvwxyz.1234567890".to_owned(),
170                "abcdefghijklmnopqrstuvwxyz.1234567890".to_owned(),
171            ),
172            (
173                r#"{"cookie":"sessionid=abc123456789; path=/"}"#.to_owned(),
174                "sessionid=abc123456789".to_owned(),
175            ),
176            (
177                "password : \"correct horse battery staple\"".to_owned(),
178                "correct horse battery staple".to_owned(),
179            ),
180            (
181                "-----BEGIN PRIVATE KEY-----\nabc123secret\n-----END PRIVATE KEY-----".to_owned(),
182                "abc123secret".to_owned(),
183            ),
184        ];
185
186        for (input, leaked_value) in fixtures {
187            let redacted = redact_secrets(&input);
188
189            assert!(
190                redacted.contains(REDACTED),
191                "fixture was not redacted: {input}"
192            );
193            assert!(
194                !redacted.contains(&leaked_value),
195                "fixture leaked {leaked_value}: {redacted}"
196            );
197        }
198    }
199}