Skip to main content

vtcode_commons/
sanitizer.rs

1//! Secret sanitization utilities for redacting sensitive information.
2//!
3//! Provides regex-based secret redaction for:
4//! - OpenAI API keys (`sk-...`)
5//! - AWS Access Key IDs (`AKIA...`)
6//! - Bearer tokens (`Bearer ...`)
7//! - Generic secret assignments (`api_key=...`, `password:...`, etc.)
8//!
9//! Use this module to sanitize text before logging, displaying in UI,
10//! or storing in session archives.
11
12use regex::Regex;
13use std::sync::LazyLock;
14
15/// OpenAI API key pattern: sk- followed by alphanumeric characters
16static OPENAI_KEY_REGEX: LazyLock<Regex> = LazyLock::new(|| compile_regex(r"sk-[A-Za-z0-9]{20,}"));
17
18/// AWS Access Key ID pattern: AKIA followed by 16 alphanumeric characters
19static AWS_ACCESS_KEY_ID_REGEX: LazyLock<Regex> =
20    LazyLock::new(|| compile_regex(r"\bAKIA[0-9A-Z]{16}\b"));
21
22/// Bearer token pattern: "Bearer " followed by token characters
23static BEARER_TOKEN_REGEX: LazyLock<Regex> =
24    LazyLock::new(|| compile_regex(r"(?i)\bBearer\s+[A-Za-z0-9.\-_]{16,}\b"));
25
26/// Generic secret assignment pattern: key=value or key: value format
27/// Matches common secret key names like api_key, token, secret, password
28static SECRET_ASSIGNMENT_REGEX: LazyLock<Regex> = LazyLock::new(|| {
29    compile_regex(r#"(?i)\b(api[\-_]?key|token|secret|password)\b(\s*[:=]\s*)(["']?)[^\s"']{8,}"#)
30});
31
32/// Redact secrets and sensitive keys from a string.
33///
34/// This is a best-effort operation using well-known regex patterns.
35/// Redacted values are replaced with `[REDACTED_SECRET]`.
36///
37/// # Examples
38///
39/// ```
40/// use vtcode_commons::sanitizer::redact_secrets;
41///
42/// let input = "API key is sk-abc123xyz789".to_string();
43/// let output = redact_secrets(input);
44/// assert_eq!(output, "API key is [REDACTED_SECRET]");
45/// ```
46pub fn redact_secrets(input: String) -> String {
47    let redacted = OPENAI_KEY_REGEX.replace_all(&input, "[REDACTED_SECRET]");
48    let redacted = AWS_ACCESS_KEY_ID_REGEX.replace_all(&redacted, "[REDACTED_SECRET]");
49    let redacted = BEARER_TOKEN_REGEX.replace_all(&redacted, "Bearer [REDACTED_SECRET]");
50    let redacted = SECRET_ASSIGNMENT_REGEX.replace_all(&redacted, "$1$2$3[REDACTED_SECRET]");
51
52    redacted.to_string()
53}
54
55fn compile_regex(pattern: &str) -> Regex {
56    match Regex::new(pattern) {
57        Ok(regex) => regex,
58        // Panic is acceptable thanks to the `load_regex` test
59        Err(err) => panic!("invalid regex pattern `{pattern}`: {err}"),
60    }
61}
62
63#[cfg(test)]
64mod tests {
65    use super::*;
66
67    #[test]
68    fn load_regex() {
69        // Verify all regex patterns compile without panicking
70        let _ = redact_secrets("test".to_string());
71    }
72
73    #[test]
74    fn redacts_openai_key() {
75        let input = "sk-abcdefghijklmnopqrstuvwxyz123456".to_string();
76        let output = redact_secrets(input);
77        assert_eq!(output, "[REDACTED_SECRET]");
78    }
79
80    #[test]
81    fn redacts_aws_access_key() {
82        let input = "AKIAIOSFODNN7EXAMPLE".to_string();
83        let output = redact_secrets(input);
84        assert_eq!(output, "[REDACTED_SECRET]");
85    }
86
87    #[test]
88    fn redacts_bearer_token() {
89        let input = "Authorization: Bearer eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9".to_string();
90        let output = redact_secrets(input);
91        assert_eq!(output, "Authorization: Bearer [REDACTED_SECRET]");
92    }
93
94    #[test]
95    fn redacts_api_key_assignment() {
96        let input = "api_key=sk-test12345678".to_string();
97        let output = redact_secrets(input);
98        assert_eq!(output, "api_key=[REDACTED_SECRET]");
99    }
100
101    #[test]
102    fn redacts_password_assignment() {
103        let input = "password: mysecretvalue".to_string();
104        let output = redact_secrets(input);
105        assert_eq!(output, "password: [REDACTED_SECRET]");
106    }
107
108    #[test]
109    fn redacts_token_in_quotes() {
110        let input = r#"token="abc123xyz789abcdef""#.to_string();
111        let output = redact_secrets(input);
112        assert_eq!(output, r#"token="[REDACTED_SECRET]""#);
113    }
114
115    #[test]
116    fn preserves_short_values() {
117        // Values under 8 characters should not be redacted
118        let input = "password: short".to_string();
119        let output = redact_secrets(input);
120        assert_eq!(output, "password: short");
121    }
122
123    #[test]
124    fn redacts_multiple_secrets() {
125        let input = "Keys: sk-test12345678901234567890 and AKIAIOSFODNN7EXAMPLE".to_string();
126        let output = redact_secrets(input);
127        // Verify both secrets are redacted
128        assert!(output.contains("[REDACTED_SECRET]"));
129        assert!(!output.contains("AKIAIOSFODNN7EXAMPLE"));
130        assert!(!output.contains("sk-test12345678901234567890"));
131    }
132
133    #[test]
134    fn preserves_non_secret_text() {
135        let input = "Hello world, this is normal text".to_string();
136        let output = redact_secrets(input);
137        assert_eq!(output, "Hello world, this is normal text");
138    }
139}