Skip to main content

kimi_wire/protocol/
redact.rs

1use serde_json::Value;
2use std::borrow::Cow;
3use std::sync::LazyLock;
4
5const REDACTED_SECRET: &str = "[REDACTED]";
6
7/// Best-effort value-shape redaction patterns.
8///
9/// Key-based redaction in [`is_sensitive_key`] is the primary defense.
10/// The patterns below are defense in depth for values that leak through
11/// nested or unstructured fields.
12static SECRET_VALUE_PATTERNS: LazyLock<Vec<regex::Regex>> = LazyLock::new(|| {
13    let patterns: &[&str] = &[
14        // GitHub personal-access / OAuth / refresh tokens.
15        r"\bgh[pousr]_[A-Za-z0-9]{20,}\b",
16        // AWS access key id.
17        r"\bAKIA[0-9A-Z]{16}\b",
18        // Slack bot/user/app/refresh tokens.
19        r"\bxox[abprs]-[A-Za-z0-9-]{10,}\b",
20        // Stripe live/test secret keys.
21        r"\bsk_(?:live|test)_[A-Za-z0-9]{16,}\b",
22        // Generic Bearer-token-shaped fragments.
23        r"(?i)\bBearer\s+[A-Za-z0-9._~+/=-]{20,}\b",
24        // PEM private key block markers.
25        r"-----BEGIN [A-Z ]*PRIVATE KEY-----",
26    ];
27    patterns
28        .iter()
29        .filter_map(|&p| regex::Regex::new(p).ok())
30        .collect()
31});
32
33/// Scrub known secret patterns from a string.
34pub fn scrub_secret_patterns(input: &str) -> Cow<'_, str> {
35    let mut current: Cow<'_, str> = Cow::Borrowed(input);
36    for re in SECRET_VALUE_PATTERNS.iter() {
37        match re.replace_all(current.as_ref(), REDACTED_SECRET) {
38            Cow::Borrowed(_) => {}
39            Cow::Owned(new) => current = Cow::Owned(new),
40        }
41    }
42    current
43}
44
45/// Redact sensitive fields in JSON payloads while preserving structure.
46pub fn redact_secrets(value: &Value) -> Value {
47    match value {
48        Value::Object(map) => {
49            let mut redacted = serde_json::Map::with_capacity(map.len());
50            for (key, entry) in map {
51                if is_sensitive_key(key) {
52                    redacted.insert(key.clone(), Value::String(REDACTED_SECRET.to_string()));
53                } else {
54                    redacted.insert(key.clone(), redact_secrets(entry));
55                }
56            }
57            Value::Object(redacted)
58        }
59        Value::Array(items) => Value::Array(items.iter().map(redact_secrets).collect()),
60        Value::String(s) => match scrub_secret_patterns(s) {
61            Cow::Borrowed(_) => value.clone(),
62            Cow::Owned(scrubbed) => Value::String(scrubbed),
63        },
64        _ => value.clone(),
65    }
66}
67
68fn is_sensitive_key(key: &str) -> bool {
69    let lower = key.to_ascii_lowercase();
70    matches!(
71        lower.as_str(),
72        "api_key" | "apikey" | "token" | "authorization" | "password" | "secret"
73    ) || lower.ends_with("_token")
74        || lower.ends_with("-token")
75        || lower.ends_with("_secret")
76        || lower.ends_with("-secret")
77        || lower.contains("authorization")
78}
79
80#[cfg(test)]
81mod tests {
82    use super::*;
83    use serde_json::json;
84
85    #[test]
86    fn test_redact_recursive() {
87        let raw = json!({
88            "api_key": "abc123",
89            "nested": {
90                "token": "tok123",
91                "headers": { "authorization": "Bearer abc" },
92                "token_usage": 42
93            },
94            "items": [{"password": "pass1"}, {"safe": "value"}]
95        });
96        let redacted = redact_secrets(&raw);
97        assert_eq!(redacted["api_key"], REDACTED_SECRET);
98        assert_eq!(redacted["nested"]["token"], REDACTED_SECRET);
99        assert_eq!(
100            redacted["nested"]["headers"]["authorization"],
101            REDACTED_SECRET
102        );
103        assert_eq!(redacted["nested"]["token_usage"], 42);
104        assert_eq!(redacted["items"][0]["password"], REDACTED_SECRET);
105        assert_eq!(redacted["items"][1]["safe"], "value");
106    }
107
108    #[test]
109    fn test_redact_value_patterns() {
110        let github_pat = ["ghp", "_", "abcdefghijklmnop1234567890abcdef0011"].concat();
111        let aws_key = ["AKIA", "ABCDEFGHIJKLMNOP"].concat();
112        let slack_token = ["xoxb", "-", "1234567890-abcdefghij1"].concat();
113        let stripe_key = ["sk_live", "_", "abcdefghij1234567890ABCD"].concat();
114        let bearer = ["Bearer", " ", "abcdef0123456789abcdef0123456789"].concat();
115        let pem = "-----BEGIN RSA PRIVATE KEY-----".to_string();
116
117        let raw = json!({
118            "transcript": [
119                format!("leaked github pat {github_pat} in env"),
120                format!("old aws key was {aws_key} and is rotated"),
121                format!("slack hook {slack_token} expired"),
122                format!("stripe payload {stripe_key} used in tests"),
123                format!("header value: Authorization: {bearer}"),
124                format!("pem block {pem} payload"),
125            ]
126        });
127        let redacted = redact_secrets(&raw);
128        let transcript = redacted["transcript"].as_array().unwrap();
129        assert_eq!(transcript[0], "leaked github pat [REDACTED] in env");
130        assert_eq!(transcript[1], "old aws key was [REDACTED] and is rotated");
131        assert_eq!(transcript[2], "slack hook [REDACTED] expired");
132        assert_eq!(transcript[3], "stripe payload [REDACTED] used in tests");
133        assert_eq!(transcript[4], "header value: Authorization: [REDACTED]");
134        assert_eq!(transcript[5], "pem block [REDACTED] payload");
135    }
136
137    #[test]
138    fn test_redact_preserves_benign_strings() {
139        let raw = json!({
140            "summary": "rotated the github token quarterly",
141            "url": "https://docs.example.com/auth/api-key.html",
142            "code": "let token = std::env::var(\"GITHUB_TOKEN\");"
143        });
144        let redacted = redact_secrets(&raw);
145        assert_eq!(redacted["summary"], "rotated the github token quarterly");
146        assert_eq!(
147            redacted["url"],
148            "https://docs.example.com/auth/api-key.html"
149        );
150        assert_eq!(
151            redacted["code"],
152            "let token = std::env::var(\"GITHUB_TOKEN\");"
153        );
154    }
155
156    #[test]
157    fn test_all_secret_patterns_compile() {
158        // If a pattern is invalid it is silently skipped by filter_map,
159        // so this test guarantees we haven't lost any defenses.
160        assert_eq!(SECRET_VALUE_PATTERNS.len(), 6);
161    }
162
163    #[test]
164    fn test_redact_idempotent() {
165        let github_pat = ["ghp", "_", "abcdefghijklmnop1234567890abcdef0011"].concat();
166        let once = redact_secrets(&json!({ "msg": github_pat }));
167        let twice = redact_secrets(&once);
168        assert_eq!(once, twice);
169        assert_eq!(once["msg"], REDACTED_SECRET);
170    }
171}