Skip to main content

agent_code_lib/services/
secret_masker.rs

1//! Secret masking for persistence boundaries.
2//!
3//! Redacts sensitive patterns (API keys, passwords, private keys)
4//! from any string before it crosses a persistence boundary:
5//! disk writes, LLM summarization prompts, transcript exports.
6//!
7//! This is defence-in-depth. Tool results, user messages, and file
8//! contents can contain secrets that should never be written to
9//! session files, compression summaries, or output-store dumps.
10//!
11//! Matched content is replaced with `[REDACTED:<kind>]`.
12
13use regex::Regex;
14use std::sync::LazyLock;
15
16/// Ordered list of (kind, pattern). Order matters: specific providers
17/// match before the generic key/value rule so the kind label is useful.
18struct Pattern {
19    kind: &'static str,
20    re: Regex,
21}
22
23static PATTERNS: LazyLock<Vec<Pattern>> = LazyLock::new(|| {
24    let specs: &[(&str, &str)] = &[
25        // PEM private keys — multiline, match whole block.
26        (
27            "private_key",
28            r"(?s)-----BEGIN (?:RSA |EC |DSA |OPENSSH |PGP )?PRIVATE KEY-----.*?-----END (?:RSA |EC |DSA |OPENSSH |PGP )?PRIVATE KEY-----",
29        ),
30        // AWS access key ID.
31        ("aws_access_key", r"AKIA[0-9A-Z]{16}"),
32        // GitHub personal access token.
33        ("github_pat", r"ghp_[a-zA-Z0-9]{36}"),
34        // GitHub fine-grained token.
35        ("github_token", r"github_pat_[a-zA-Z0-9_]{50,}"),
36        // Provider API keys (OpenAI, Anthropic etc.) — long opaque prefix.
37        ("provider_api_key", r"sk-[a-zA-Z0-9_\-]{20,}"),
38        // URL-embedded credentials: postgres://user:password@host, redis://:pw@host, etc.
39        // The password char class excludes `@`, whitespace, `"`, `'`, and `\`
40        // so the match stops at the URL boundary and never crosses a
41        // surrounding string delimiter.
42        (
43            "url_credential",
44            r#"(?i)\b(postgres(?:ql)?|mysql|mariadb|redis|rediss|mongodb(?:\+srv)?|amqp|amqps|mqtt|mqtts|smtp|smtps|sftp|ssh|ldap|ldaps|https?)://([a-zA-Z0-9._-]*):([^@\s"'\\]{3,})@"#,
45        ),
46        // Generic assignment, quoted form: api_key = "sekrit", password: 'hunter2hunter2'.
47        // Requires matching open/close quotes so this pattern can never
48        // consume a stray delimiter (e.g. a JSON string's closing quote).
49        // An optional leading backslash on each quote matches JSON-escaped
50        // forms like `api_key = \"sekrit\"` that appear when a string-
51        // valued message is serialized into an outer JSON document.
52        (
53            "credential",
54            r#"(?i)\b((?:[a-z][a-z0-9_-]*[_-])?(?:api[_-]?key|secret|password|passwd|token|auth)[a-z0-9_-]*)\s*[:=]\s*(?:\\?"[A-Za-z0-9_\-./+=]{8,}\\?"|\\?'[A-Za-z0-9_\-./+=]{8,}\\?')"#,
55        ),
56        // Generic assignment, unquoted form: api_key=sekrit, auth_token = sekrit.
57        // The value char class excludes `"` and `'`, so matching stops at
58        // any surrounding JSON/TOML/YAML string delimiter rather than
59        // eating it.
60        (
61            "credential",
62            r#"(?i)\b((?:[a-z][a-z0-9_-]*[_-])?(?:api[_-]?key|secret|password|passwd|token|auth)[a-z0-9_-]*)\s*[:=]\s*[A-Za-z0-9_\-./+=]{8,}"#,
63        ),
64    ];
65
66    specs
67        .iter()
68        .map(|(kind, pat)| Pattern {
69            kind,
70            re: Regex::new(pat).expect("secret_masker pattern must compile"),
71        })
72        .collect()
73});
74
75/// Mask all known secret patterns in `input`.
76///
77/// Idempotent: running twice produces the same output (the replacement
78/// `[REDACTED:<kind>]` contains no characters that match any pattern).
79pub fn mask(input: &str) -> String {
80    let mut out = input.to_string();
81    for p in PATTERNS.iter() {
82        match p.kind {
83            // Keep the identifier visible but scrub the value.
84            "credential" => {
85                out =
86                    p.re.replace_all(&out, "${1}=[REDACTED:credential]")
87                        .into_owned();
88            }
89            // Preserve scheme and username so the URL shape survives
90            // for debugging; only the password segment is masked.
91            "url_credential" => {
92                out =
93                    p.re.replace_all(&out, "${1}://${2}:[REDACTED:url_credential]@")
94                        .into_owned();
95            }
96            _ => {
97                let replacement = format!("[REDACTED:{}]", p.kind);
98                out = p.re.replace_all(&out, replacement.as_str()).into_owned();
99            }
100        }
101    }
102    out
103}
104
105#[cfg(test)]
106mod tests {
107    use super::*;
108
109    #[test]
110    fn masks_aws_access_key() {
111        let input = "aws key is AKIAIOSFODNN7EXAMPLE in the config";
112        let out = mask(input);
113        assert!(out.contains("[REDACTED:aws_access_key]"));
114        assert!(!out.contains("AKIAIOSFODNN7EXAMPLE"));
115    }
116
117    #[test]
118    fn masks_github_pat() {
119        let input = "token=ghp_abcdefghijklmnopqrstuvwxyz0123456789";
120        let out = mask(input);
121        assert!(!out.contains("ghp_abcdefghijklmnopqrstuvwxyz0123456789"));
122        // The generic credential rule runs after the github_pat rule —
123        // either label is acceptable so long as the secret is gone.
124        assert!(out.contains("REDACTED"));
125    }
126
127    #[test]
128    fn masks_openai_style_key() {
129        let input = "OPENAI_API_KEY=sk-proj-abcdefghijklmnopqrstuvwxyz1234567890";
130        let out = mask(input);
131        assert!(!out.contains("sk-proj-abcdefghijklmnopqrstuvwxyz1234567890"));
132        assert!(out.contains("REDACTED"));
133    }
134
135    #[test]
136    fn masks_pem_private_key() {
137        let input = "config:\n-----BEGIN RSA PRIVATE KEY-----\nMIIEpAIBAAKCAQEA\ngarbage\n-----END RSA PRIVATE KEY-----\nend";
138        let out = mask(input);
139        assert!(out.contains("[REDACTED:private_key]"));
140        assert!(!out.contains("MIIEpAIBAAKCAQEA"));
141        assert!(out.starts_with("config:"));
142        assert!(out.trim_end().ends_with("end"));
143    }
144
145    #[test]
146    fn masks_generic_credential_assignment() {
147        let cases = [
148            "api_key = \"abcd1234efgh5678\"",
149            "API-KEY: s3cret_v@lue_12345",
150            "password=hunter2hunter2",
151            "auth_token = longlonglonglonglonglonglongvalue",
152        ];
153        for c in cases {
154            let out = mask(c);
155            assert!(
156                out.contains("[REDACTED:credential]"),
157                "failed to mask: {c} -> {out}",
158            );
159        }
160    }
161
162    #[test]
163    fn preserves_non_secret_text() {
164        let input = "the quick brown fox jumps over the lazy dog 42 times";
165        let out = mask(input);
166        assert_eq!(out, input);
167    }
168
169    #[test]
170    fn does_not_mask_short_values() {
171        // 4-char password should not trip the generic credential rule
172        // (requires 8+ chars to avoid masking common identifiers).
173        let input = "id = abcd";
174        let out = mask(input);
175        assert_eq!(out, input);
176    }
177
178    #[test]
179    fn idempotent() {
180        let input = "key=sk-abcdefghijklmnopqrstuvwxyz and AKIAIOSFODNN7EXAMPLE";
181        let once = mask(input);
182        let twice = mask(&once);
183        assert_eq!(once, twice);
184    }
185
186    #[test]
187    fn masks_multiple_secrets_in_one_string() {
188        let input = "aws=AKIAIOSFODNN7EXAMPLE gh=ghp_abcdefghijklmnopqrstuvwxyz0123456789";
189        let out = mask(input);
190        assert!(!out.contains("AKIAIOSFODNN7EXAMPLE"));
191        assert!(!out.contains("ghp_abcdefghijklmnopqrstuvwxyz0123456789"));
192    }
193
194    #[test]
195    fn masks_single_quoted_credential() {
196        let input = "api_key = 'hunter2hunter2'";
197        let out = mask(input);
198        assert!(!out.contains("hunter2hunter2"));
199        assert!(out.contains("[REDACTED:credential]"));
200    }
201
202    #[test]
203    fn masks_mixed_quoted_and_unquoted_in_one_input() {
204        // Quoted alternative runs first; unquoted second. Both must fire.
205        let input = r#"password = "firstsecretvalue1" and token=secondsecretvalue2"#;
206        let out = mask(input);
207        assert!(!out.contains("firstsecretvalue1"));
208        assert!(!out.contains("secondsecretvalue2"));
209        // Two distinct credential matches.
210        assert_eq!(out.matches("[REDACTED:credential]").count(), 2);
211    }
212
213    #[test]
214    fn does_not_consume_surrounding_json_quote() {
215        // Regression test for the JSON corruption fix. An unquoted inner
216        // secret inside a JSON string literal must not eat the closing
217        // JSON quote — the result must remain structurally valid.
218        let input = r#"{"text": "api_key=hunter2hunter2hunter2"}"#;
219        let out = mask(input);
220        assert!(out.contains("[REDACTED:credential]"));
221        // The closing `"` before `}` must survive.
222        assert!(out.ends_with(r#""}"#));
223        // And the whole thing must still parse as JSON.
224        serde_json::from_str::<serde_json::Value>(&out)
225            .expect("masked JSON fragment must still parse");
226    }
227
228    #[test]
229    fn does_not_mask_json_key_form() {
230        // `"api_key": "value"` should not trigger the credential rule
231        // because the JSON `"` separates the key from the `:`.
232        let input = r#"{"api_key": "hunter2hunter2"}"#;
233        let out = mask(input);
234        // The value is still a secret-looking string, but the regex
235        // can't tell a JSON field key from an assignment statement.
236        // What we're asserting: the result is still valid JSON.
237        // (Whether it's masked is a secondary question — the important
238        // invariant is we don't corrupt structure.)
239        serde_json::from_str::<serde_json::Value>(&out)
240            .expect("masked JSON key form must still parse");
241    }
242
243    #[test]
244    fn empty_input_does_not_panic() {
245        assert_eq!(mask(""), "");
246    }
247
248    #[test]
249    fn strengthened_idempotency_across_split_pattern() {
250        // Run the masker multiple times on a cocktail of secret shapes
251        // and confirm the result converges after the first pass.
252        let input = concat!(
253            "AKIAIOSFODNN7EXAMPLE ",
254            "ghp_abcdefghijklmnopqrstuvwxyz0123456789 ",
255            "sk-proj-abcdefghijklmnopqrstuvwxyz12345 ",
256            r#"password = "firstsecretvalue1" "#,
257            "token=secondsecretvalue2 ",
258            "auth_token: thirdsecretvalue3",
259        );
260        let once = mask(input);
261        let twice = mask(&once);
262        let thrice = mask(&twice);
263        assert_eq!(once, twice, "mask is not idempotent");
264        assert_eq!(twice, thrice);
265        // Confirm each shape hit something.
266        assert!(!once.contains("AKIAIOSFODNN7EXAMPLE"));
267        assert!(!once.contains("ghp_abcdefghijklmnopqrstuvwxyz0123456789"));
268        assert!(!once.contains("firstsecretvalue1"));
269        assert!(!once.contains("secondsecretvalue2"));
270        assert!(!once.contains("thirdsecretvalue3"));
271    }
272
273    #[test]
274    fn masks_url_embedded_password_postgres() {
275        let input = "postgres://admin:hunter2hunter2@db.internal:5432/prod";
276        let out = mask(input);
277        assert!(!out.contains("hunter2hunter2"));
278        assert!(out.contains("[REDACTED:url_credential]"));
279        // Scheme and username must survive for debugging.
280        assert!(out.starts_with("postgres://admin:"));
281        assert!(out.contains("@db.internal:5432/prod"));
282    }
283
284    #[test]
285    fn masks_url_embedded_password_redis_without_user() {
286        let input = "redis://:supersecretvalue@cache.local:6379";
287        let out = mask(input);
288        assert!(!out.contains("supersecretvalue"));
289        assert!(out.contains("[REDACTED:url_credential]"));
290    }
291
292    #[test]
293    fn masks_url_embedded_password_inside_json_escape() {
294        // Tool result that's been embedded into a JSON string — quotes
295        // around the URL are escaped. The URL regex must stop at the
296        // escaped closing quote, not eat past it.
297        let input = r#"{"text": "DATABASE_URL=postgres://user:hunter2hunter2@host/db"}"#;
298        let out = mask(input);
299        assert!(!out.contains("hunter2hunter2"));
300        // Result must remain valid JSON (closing `"` and `}` intact).
301        serde_json::from_str::<serde_json::Value>(&out)
302            .expect("masked URL-credential JSON must still parse");
303    }
304
305    #[test]
306    fn does_not_mask_url_without_password() {
307        let input = "visit https://example.com/path for docs";
308        let out = mask(input);
309        assert_eq!(out, input);
310    }
311
312    #[test]
313    fn masks_uppercase_env_var_style() {
314        let input = "API_KEY=verylongprovidersecretvalue123";
315        let out = mask(input);
316        assert!(!out.contains("verylongprovidersecretvalue123"));
317        assert!(out.contains("REDACTED"));
318    }
319
320    #[test]
321    fn allows_legitimate_code_through() {
322        let code = r#"
323            fn add(a: i32, b: i32) -> i32 { a + b }
324            let x = vec![1, 2, 3];
325            println!("{}", x.len());
326        "#;
327        let out = mask(code);
328        assert_eq!(out, code);
329    }
330}