Skip to main content

aster/telemetry/
sanitizer.rs

1//! 敏感数据清洗
2
3use once_cell::sync::Lazy;
4use regex::Regex;
5use serde_json::Value;
6
7/// 敏感数据正则模式
8static SENSITIVE_PATTERNS: Lazy<Vec<Regex>> = Lazy::new(|| {
9    vec![
10        // Email
11        Regex::new(r"\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,}\b").unwrap(),
12        // IP address
13        Regex::new(r"\b(?:\d{1,3}\.){3}\d{1,3}\b").unwrap(),
14        // API keys (Anthropic style)
15        Regex::new(r"\bsk-[a-zA-Z0-9]{32,}\b").unwrap(),
16        // Home paths (Unix)
17        Regex::new(r"/home/[a-zA-Z0-9_-]+").unwrap(),
18        // Home paths (Mac)
19        Regex::new(r"/Users/[a-zA-Z0-9_-]+").unwrap(),
20        // Home paths (Windows)
21        Regex::new(r"C:\\Users\\[a-zA-Z0-9_-]+").unwrap(),
22    ]
23});
24
25/// 敏感字段名
26const SENSITIVE_FIELDS: &[&str] = &[
27    "password",
28    "secret",
29    "token",
30    "key",
31    "auth",
32    "credential",
33    "api_key",
34    "apikey",
35];
36
37/// 清洗字符串中的敏感数据
38pub fn sanitize_string(s: &str) -> String {
39    let mut result = s.to_string();
40    for pattern in SENSITIVE_PATTERNS.iter() {
41        result = pattern.replace_all(&result, "[REDACTED]").to_string();
42    }
43    result
44}
45
46/// 检查字段名是否敏感
47fn is_sensitive_field(key: &str) -> bool {
48    let lower = key.to_lowercase();
49    SENSITIVE_FIELDS.iter().any(|f| lower.contains(f))
50}
51
52/// 清洗 JSON 值中的敏感数据
53pub fn sanitize_value(value: &Value) -> Value {
54    match value {
55        Value::String(s) => Value::String(sanitize_string(s)),
56        Value::Array(arr) => Value::Array(arr.iter().map(sanitize_value).collect()),
57        Value::Object(obj) => {
58            let mut result = serde_json::Map::new();
59            for (key, val) in obj {
60                if is_sensitive_field(key) {
61                    result.insert(key.clone(), Value::String("[REDACTED]".to_string()));
62                } else {
63                    result.insert(key.clone(), sanitize_value(val));
64                }
65            }
66            Value::Object(result)
67        }
68        other => other.clone(),
69    }
70}
71
72/// 清洗 HashMap 中的敏感数据
73pub fn sanitize_map(
74    map: &std::collections::HashMap<String, Value>,
75) -> std::collections::HashMap<String, Value> {
76    map.iter()
77        .map(|(k, v)| {
78            if is_sensitive_field(k) {
79                (k.clone(), Value::String("[REDACTED]".to_string()))
80            } else {
81                (k.clone(), sanitize_value(v))
82            }
83        })
84        .collect()
85}
86
87#[cfg(test)]
88mod tests {
89    use super::*;
90
91    #[test]
92    fn test_sanitize_email() {
93        let input = "Contact: user@example.com";
94        let result = sanitize_string(input);
95        assert!(result.contains("[REDACTED]"));
96        assert!(!result.contains("user@example.com"));
97    }
98
99    #[test]
100    fn test_sanitize_api_key() {
101        let input = "Key: sk-abcdefghijklmnopqrstuvwxyz123456";
102        let result = sanitize_string(input);
103        assert!(result.contains("[REDACTED]"));
104    }
105
106    #[test]
107    fn test_sanitize_sensitive_field() {
108        let mut map = std::collections::HashMap::new();
109        map.insert(
110            "password".to_string(),
111            Value::String("secret123".to_string()),
112        );
113        map.insert("name".to_string(), Value::String("John".to_string()));
114
115        let result = sanitize_map(&map);
116        assert_eq!(
117            result.get("password"),
118            Some(&Value::String("[REDACTED]".to_string()))
119        );
120        assert_eq!(result.get("name"), Some(&Value::String("John".to_string())));
121    }
122}