agents_core/
security.rs

1//! Security utilities for PII protection and data sanitization
2
3use regex::Regex;
4use serde_json::Value;
5use std::collections::HashSet;
6
7/// Maximum length for message previews to prevent PII leakage
8pub const MAX_PREVIEW_LENGTH: usize = 100;
9
10/// Sensitive field names that should be redacted from tool payloads
11const SENSITIVE_FIELDS: &[&str] = &[
12    "password",
13    "passwd",
14    "pwd",
15    "secret",
16    "token",
17    "api_key",
18    "apikey",
19    "access_token",
20    "refresh_token",
21    "auth_token",
22    "authorization",
23    "bearer",
24    "credit_card",
25    "card_number",
26    "cvv",
27    "ssn",
28    "social_security",
29    "private_key",
30    "privatekey",
31    "encryption_key",
32];
33
34lazy_static::lazy_static! {
35    /// Regex patterns for detecting PII in text
36    static ref EMAIL_PATTERN: Regex = Regex::new(r"\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,}\b").unwrap();
37    static ref PHONE_PATTERN: Regex = Regex::new(r"\b(\+?\d{1,3}[-.\s]?)?\(?\d{3}\)?[-.\s]?\d{3}[-.\s]?\d{4}\b").unwrap();
38    static ref CREDIT_CARD_PATTERN: Regex = Regex::new(r"\b\d{4}[-\s]?\d{4}[-\s]?\d{4}[-\s]?\d{4}\b").unwrap();
39}
40
41/// Truncate a string to a maximum length, adding ellipsis if truncated
42///
43/// # Examples
44///
45/// ```
46/// use agents_core::security::truncate_string;
47///
48/// let short = "Hello";
49/// assert_eq!(truncate_string(short, 100), "Hello");
50///
51/// let long = "a".repeat(150);
52/// let truncated = truncate_string(&long, 100);
53/// assert_eq!(truncated.len(), 103); // 100 chars + "..."
54/// assert!(truncated.ends_with("..."));
55/// ```
56pub fn truncate_string(text: &str, max_length: usize) -> String {
57    if text.chars().count() <= max_length {
58        text.to_string()
59    } else {
60        format!("{:.len$}...", text, len = max_length)
61    }
62}
63
64/// Sanitize a JSON value by redacting sensitive fields
65///
66/// This function recursively traverses a JSON structure and replaces
67/// values of sensitive fields with "[REDACTED]".
68///
69/// # Examples
70///
71/// ```
72/// use serde_json::json;
73/// use agents_core::security::sanitize_json;
74///
75/// let input = json!({
76///     "username": "john",
77///     "password": "secret123",
78///     "api_key": "sk-1234567890"
79/// });
80///
81/// let sanitized = sanitize_json(&input);
82/// assert_eq!(sanitized["username"], "john");
83/// assert_eq!(sanitized["password"], "[REDACTED]");
84/// assert_eq!(sanitized["api_key"], "[REDACTED]");
85/// ```
86pub fn sanitize_json(value: &Value) -> Value {
87    let sensitive_set: HashSet<&str> = SENSITIVE_FIELDS.iter().copied().collect();
88    sanitize_json_recursive(value, &sensitive_set)
89}
90
91fn sanitize_json_recursive(value: &Value, sensitive_fields: &HashSet<&str>) -> Value {
92    match value {
93        Value::Object(map) => {
94            let mut sanitized = serde_json::Map::new();
95            for (key, val) in map {
96                let key_lower = key.to_lowercase();
97                if sensitive_fields
98                    .iter()
99                    .any(|&field| key_lower.contains(field))
100                {
101                    sanitized.insert(key.clone(), Value::String("[REDACTED]".to_string()));
102                } else {
103                    sanitized.insert(key.clone(), sanitize_json_recursive(val, sensitive_fields));
104                }
105            }
106            Value::Object(sanitized)
107        }
108        Value::Array(arr) => Value::Array(
109            arr.iter()
110                .map(|v| sanitize_json_recursive(v, sensitive_fields))
111                .collect(),
112        ),
113        _ => value.clone(),
114    }
115}
116
117/// Redact PII patterns from text (emails, phone numbers, credit cards)
118///
119/// # Examples
120///
121/// ```
122/// use agents_core::security::redact_pii;
123///
124/// let text = "Contact me at john@example.com or call 555-123-4567";
125/// let redacted = redact_pii(text);
126/// assert!(redacted.contains("[EMAIL]"));
127/// assert!(redacted.contains("[PHONE]"));
128/// assert!(!redacted.contains("john@example.com"));
129/// assert!(!redacted.contains("555-123-4567"));
130/// ```
131pub fn redact_pii(text: &str) -> String {
132    let mut result = text.to_string();
133
134    // Redact emails
135    result = EMAIL_PATTERN.replace_all(&result, "[EMAIL]").to_string();
136
137    // Redact phone numbers
138    result = PHONE_PATTERN.replace_all(&result, "[PHONE]").to_string();
139
140    // Redact credit card numbers
141    result = CREDIT_CARD_PATTERN
142        .replace_all(&result, "[CARD]")
143        .to_string();
144
145    result
146}
147
148/// Create a safe preview of text by truncating and redacting PII
149///
150/// This combines truncation and PII redaction for maximum safety.
151///
152/// # Examples
153///
154/// ```
155/// use agents_core::security::safe_preview;
156///
157/// let text = "My email is john@example.com and here's a very long message that goes on and on...";
158/// let preview = safe_preview(text, 50);
159/// assert!(preview.len() <= 53); // 50 + "..."
160/// assert!(preview.contains("[EMAIL]"));
161/// ```
162pub fn safe_preview(text: &str, max_length: usize) -> String {
163    let redacted = redact_pii(text);
164    truncate_string(&redacted, max_length)
165}
166
167/// Sanitize tool payload for safe logging/broadcasting
168///
169/// This function:
170/// 1. Redacts sensitive fields from JSON
171/// 2. Truncates the result to prevent excessive data
172/// 3. Redacts any remaining PII patterns
173///
174/// # Examples
175///
176/// ```
177/// use serde_json::json;
178/// use agents_core::security::sanitize_tool_payload;
179///
180/// let payload = json!({
181///     "password": "secret123",
182///     "api_key": "sk-1234567890",
183///     "user": "john@example.com"
184/// });
185///
186/// let sanitized = sanitize_tool_payload(&payload, 100);
187/// assert!(sanitized.contains("[REDACTED]"));
188/// assert!(sanitized.contains("[EMAIL]"));
189/// assert!(sanitized.len() <= 103); // 100 + "..."
190/// ```
191pub fn sanitize_tool_payload(payload: &Value, max_length: usize) -> String {
192    let sanitized_json = sanitize_json(payload);
193    let json_str = sanitized_json.to_string();
194    safe_preview(&json_str, max_length)
195}
196
197#[cfg(test)]
198mod tests {
199    use super::*;
200    use serde_json::json;
201
202    #[test]
203    fn test_truncate_string_short() {
204        let text = "Hello, world!";
205        assert_eq!(truncate_string(text, 100), "Hello, world!");
206    }
207
208    #[test]
209    fn test_truncate_string_long() {
210        let text = "a".repeat(150);
211        let truncated = truncate_string(&text, 100);
212        assert_eq!(truncated.len(), 103); // 100 + "..."
213        assert!(truncated.ends_with("..."));
214        assert_eq!(&truncated[..100], &text[..100]);
215    }
216
217    #[test]
218    fn test_truncate_string_exact() {
219        let text = "a".repeat(100);
220        let truncated = truncate_string(&text, 100);
221        assert_eq!(truncated.len(), 100);
222        assert!(!truncated.ends_with("..."));
223    }
224
225    // Unicode Tests: Edge Cases
226    #[test]
227    fn test_truncate_string_empty() {
228        let text = "";
229        assert_eq!(truncate_string(text, 10), "");
230        assert_eq!(truncate_string(text, 0), "");
231    }
232
233    #[test]
234    fn test_truncate_string_composite_emoji() {
235        // Family emoji: ๐Ÿ‘จโ€๐Ÿ‘ฉโ€๐Ÿ‘งโ€๐Ÿ‘ฆ
236        // chars().count() = 7: ['๐Ÿ‘จ', '\u{200D}', '๐Ÿ‘ฉ', '\u{200D}', '๐Ÿ‘ง', '\u{200D}', '๐Ÿ‘ฆ']
237        let family = "๐Ÿ‘จโ€๐Ÿ‘ฉโ€๐Ÿ‘งโ€๐Ÿ‘ฆ";
238        let result = truncate_string(family, 3);
239        // Will truncate at the ZWJ, producing incomplete emoji sequence
240        assert_eq!(result.chars().count(), 6); // 3 chars + "..."
241        assert!(result.starts_with("๐Ÿ‘จโ€๐Ÿ‘ฉ"));
242    }
243
244    #[test]
245    fn test_sanitize_json_simple() {
246        let input = json!({
247            "username": "john",
248            "password": "secret123"
249        });
250
251        let sanitized = sanitize_json(&input);
252        assert_eq!(sanitized["username"], "john");
253        assert_eq!(sanitized["password"], "[REDACTED]");
254    }
255
256    #[test]
257    fn test_sanitize_json_nested() {
258        let input = json!({
259            "user": {
260                "name": "john",
261                "credentials": {
262                    "password": "secret123",
263                    "api_key": "sk-1234567890"
264                }
265            }
266        });
267
268        let sanitized = sanitize_json(&input);
269        assert_eq!(sanitized["user"]["name"], "john");
270        assert_eq!(sanitized["user"]["credentials"]["password"], "[REDACTED]");
271        assert_eq!(sanitized["user"]["credentials"]["api_key"], "[REDACTED]");
272    }
273
274    #[test]
275    fn test_sanitize_json_array() {
276        let input = json!({
277            "users": [
278                {"name": "john", "password": "secret1"},
279                {"name": "jane", "token": "abc123"}
280            ]
281        });
282
283        let sanitized = sanitize_json(&input);
284        assert_eq!(sanitized["users"][0]["name"], "john");
285        assert_eq!(sanitized["users"][0]["password"], "[REDACTED]");
286        assert_eq!(sanitized["users"][1]["name"], "jane");
287        assert_eq!(sanitized["users"][1]["token"], "[REDACTED]");
288    }
289
290    #[test]
291    fn test_sanitize_json_case_insensitive() {
292        let input = json!({
293            "Password": "secret123",
294            "API_KEY": "sk-1234567890",
295            "AccessToken": "token123"
296        });
297
298        let sanitized = sanitize_json(&input);
299        assert_eq!(sanitized["Password"], "[REDACTED]");
300        assert_eq!(sanitized["API_KEY"], "[REDACTED]");
301        assert_eq!(sanitized["AccessToken"], "[REDACTED]");
302    }
303
304    #[test]
305    fn test_redact_pii_email() {
306        let text = "Contact me at john.doe@example.com for more info";
307        let redacted = redact_pii(text);
308        assert!(redacted.contains("[EMAIL]"));
309        assert!(!redacted.contains("john.doe@example.com"));
310    }
311
312    #[test]
313    fn test_redact_pii_phone() {
314        let text = "Call me at 555-123-4567 or (555) 987-6543";
315        let redacted = redact_pii(text);
316        assert!(redacted.contains("[PHONE]"));
317        assert!(!redacted.contains("555-123-4567"));
318        assert!(!redacted.contains("555) 987-6543"));
319    }
320
321    #[test]
322    fn test_redact_pii_credit_card() {
323        let text = "Card number: 4532-1234-5678-9010";
324        let redacted = redact_pii(text);
325        assert!(redacted.contains("[CARD]"));
326        assert!(!redacted.contains("4532-1234-5678-9010"));
327    }
328
329    #[test]
330    fn test_redact_pii_multiple() {
331        let text = "Email: john@example.com, Phone: 555-123-1234, Card: 4532123456789010";
332        let redacted = redact_pii(text);
333        assert!(redacted.contains("[EMAIL]"));
334        assert!(redacted.contains("[PHONE]"));
335        assert!(redacted.contains("[CARD]"));
336    }
337
338    #[test]
339    fn test_safe_preview() {
340        let text = "My email is john@example.com and here's a very long message that goes on and on and on and on and on and on";
341        let preview = safe_preview(text, 50);
342
343        // Should be truncated
344        assert!(preview.len() <= 53); // 50 + "..."
345
346        // Should have PII redacted
347        assert!(preview.contains("[EMAIL]"));
348        assert!(!preview.contains("john@example.com"));
349    }
350
351    #[test]
352    fn test_sanitize_tool_payload() {
353        let payload = json!({
354            "password": "secret123",
355            "api_key": "sk-1234567890",
356            "user": "john@example.com"
357        });
358
359        let sanitized = sanitize_tool_payload(&payload, 100);
360
361        // Should be truncated
362        assert!(
363            sanitized.len() <= 103,
364            "Length should be <= 103, got: {}",
365            sanitized.len()
366        );
367
368        // Password and api_key fields should be redacted
369        assert!(
370            sanitized.contains("[REDACTED]"),
371            "Expected [REDACTED] in output, got: {}",
372            sanitized
373        );
374
375        // Email should be redacted
376        assert!(
377            sanitized.contains("[EMAIL]"),
378            "Expected [EMAIL] in output, got: {}",
379            sanitized
380        );
381    }
382
383    #[test]
384    fn test_sanitize_tool_payload_long_message() {
385        let payload = json!({
386            "password": "secret123",
387            "message": "a".repeat(200)
388        });
389
390        let sanitized = sanitize_tool_payload(&payload, 100);
391
392        // Should be truncated
393        assert!(sanitized.len() <= 103);
394
395        // Even though truncated, password should still be redacted in the JSON structure
396        // The order of fields in JSON is not guaranteed, but [REDACTED] should appear
397        // if the password field comes before the truncation point
398        assert!(sanitized.contains("[REDACTED]") || sanitized.ends_with("..."));
399    }
400
401    #[test]
402    fn test_sanitize_tool_payload_no_sensitive_data() {
403        let payload = json!({
404            "action": "get_weather",
405            "location": "Dubai"
406        });
407
408        let sanitized = sanitize_tool_payload(&payload, 100);
409        assert!(sanitized.contains("get_weather"));
410        assert!(sanitized.contains("Dubai"));
411        assert!(!sanitized.contains("[REDACTED]"));
412    }
413}