Skip to main content

vtcode_commons/
sanitizer.rs

1//! Secret sanitization utilities for redacting sensitive information.
2//!
3//! Provides regex-based secret redaction for:
4//! - OpenAI API keys (`sk-...`)
5//! - AWS Access Key IDs (`AKIA...`)
6//! - Bearer tokens (`Bearer ...`)
7//! - Generic secret assignments (`api_key=...`, `password:...`, etc.)
8//!
9//! Use this module to sanitize text before logging, displaying in UI,
10//! or storing in session archives.
11
12use regex::Regex;
13use std::sync::LazyLock;
14
15/// OpenAI API key pattern: sk- followed by alphanumeric characters
16static OPENAI_KEY_REGEX: LazyLock<Regex> = LazyLock::new(|| compile_regex(r"sk-[A-Za-z0-9]{20,}"));
17
18/// AWS Access Key ID pattern: AKIA followed by 16 alphanumeric characters
19static AWS_ACCESS_KEY_ID_REGEX: LazyLock<Regex> =
20    LazyLock::new(|| compile_regex(r"\bAKIA[0-9A-Z]{16}\b"));
21
22/// Bearer token pattern: "Bearer " followed by token characters
23static BEARER_TOKEN_REGEX: LazyLock<Regex> =
24    LazyLock::new(|| compile_regex(r"(?i)\bBearer\s+[A-Za-z0-9.\-_]{16,}\b"));
25
26/// Generic secret assignment pattern: key=value or key: value format
27/// Matches common secret key names like api_key, token, secret, password
28static SECRET_ASSIGNMENT_REGEX: LazyLock<Regex> = LazyLock::new(|| {
29    compile_regex(r#"(?i)\b(api[\-_]?key|token|secret|password)\b(\s*[:=]\s*)(["']?)[^\s"']{8,}"#)
30});
31
32/// Redact secrets and sensitive keys from a string.
33///
34/// This is a best-effort operation using well-known regex patterns.
35/// Redacted values are replaced with `[REDACTED_SECRET]`.
36///
37/// # Examples
38///
39/// ```
40/// use vtcode_commons::sanitizer::redact_secrets;
41///
42/// let input = "API key is sk-abc123xyz789".to_string();
43/// let output = redact_secrets(input);
44/// assert_eq!(output, "API key is [REDACTED_SECRET]");
45/// ```
46pub fn redact_secrets(input: String) -> String {
47    let redacted = OPENAI_KEY_REGEX.replace_all(&input, "[REDACTED_SECRET]");
48    let redacted = AWS_ACCESS_KEY_ID_REGEX.replace_all(&redacted, "[REDACTED_SECRET]");
49    let redacted = BEARER_TOKEN_REGEX.replace_all(&redacted, "Bearer [REDACTED_SECRET]");
50    let redacted = SECRET_ASSIGNMENT_REGEX.replace_all(&redacted, "$1$2$3[REDACTED_SECRET]");
51
52    redacted.to_string()
53}
54
55#[allow(clippy::panic)]
56fn compile_regex(pattern: &str) -> Regex {
57    match Regex::new(pattern) {
58        Ok(regex) => regex,
59        // Panic is acceptable thanks to the `load_regex` test
60        Err(err) => panic!("invalid regex pattern `{pattern}`: {err}"),
61    }
62}
63
64#[cfg(test)]
65mod tests {
66    use super::*;
67
68    #[test]
69    fn load_regex() {
70        // Verify all regex patterns compile without panicking
71        let _ = redact_secrets("test".to_string());
72    }
73
74    #[test]
75    fn redacts_openai_key() {
76        let input = "sk-abcdefghijklmnopqrstuvwxyz123456".to_string();
77        let output = redact_secrets(input);
78        assert_eq!(output, "[REDACTED_SECRET]");
79    }
80
81    #[test]
82    fn redacts_aws_access_key() {
83        let input = "AKIAIOSFODNN7EXAMPLE".to_string();
84        let output = redact_secrets(input);
85        assert_eq!(output, "[REDACTED_SECRET]");
86    }
87
88    #[test]
89    fn redacts_bearer_token() {
90        let input = "Authorization: Bearer eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9".to_string();
91        let output = redact_secrets(input);
92        assert_eq!(output, "Authorization: Bearer [REDACTED_SECRET]");
93    }
94
95    #[test]
96    fn redacts_api_key_assignment() {
97        let input = "api_key=sk-test12345678".to_string();
98        let output = redact_secrets(input);
99        assert_eq!(output, "api_key=[REDACTED_SECRET]");
100    }
101
102    #[test]
103    fn redacts_password_assignment() {
104        let input = "password: mysecretvalue".to_string();
105        let output = redact_secrets(input);
106        assert_eq!(output, "password: [REDACTED_SECRET]");
107    }
108
109    #[test]
110    fn redacts_token_in_quotes() {
111        let input = r#"token="abc123xyz789abcdef""#.to_string();
112        let output = redact_secrets(input);
113        assert_eq!(output, r#"token="[REDACTED_SECRET]""#);
114    }
115
116    #[test]
117    fn preserves_short_values() {
118        // Values under 8 characters should not be redacted
119        let input = "password: short".to_string();
120        let output = redact_secrets(input);
121        assert_eq!(output, "password: short");
122    }
123
124    #[test]
125    fn redacts_multiple_secrets() {
126        let input = "Keys: sk-test12345678901234567890 and AKIAIOSFODNN7EXAMPLE".to_string();
127        let output = redact_secrets(input);
128        // Verify both secrets are redacted
129        assert!(output.contains("[REDACTED_SECRET]"));
130        assert!(!output.contains("AKIAIOSFODNN7EXAMPLE"));
131        assert!(!output.contains("sk-test12345678901234567890"));
132    }
133
134    #[test]
135    fn preserves_non_secret_text() {
136        let input = "Hello world, this is normal text".to_string();
137        let output = redact_secrets(input);
138        assert_eq!(output, "Hello world, this is normal text");
139    }
140}