1use regex::Regex;
4use serde_json::Value;
5use std::collections::HashSet;
6
7pub const MAX_PREVIEW_LENGTH: usize = 100;
9
10const SENSITIVE_FIELDS: &[&str] = &[
12 "password",
13 "passwd",
14 "pwd",
15 "secret",
16 "token",
17 "api_key",
18 "apikey",
19 "access_token",
20 "refresh_token",
21 "auth_token",
22 "authorization",
23 "bearer",
24 "credit_card",
25 "card_number",
26 "cvv",
27 "ssn",
28 "social_security",
29 "private_key",
30 "privatekey",
31 "encryption_key",
32];
33
34lazy_static::lazy_static! {
35 static ref EMAIL_PATTERN: Regex = Regex::new(r"\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,}\b").unwrap();
37 static ref PHONE_PATTERN: Regex = Regex::new(r"\b(\+?\d{1,3}[-.\s]?)?\(?\d{3}\)?[-.\s]?\d{3}[-.\s]?\d{4}\b").unwrap();
38 static ref CREDIT_CARD_PATTERN: Regex = Regex::new(r"\b\d{4}[-\s]?\d{4}[-\s]?\d{4}[-\s]?\d{4}\b").unwrap();
39}
40
41pub fn truncate_string(text: &str, max_length: usize) -> String {
57 if text.chars().count() <= max_length {
58 text.to_string()
59 } else {
60 format!("{:.len$}...", text, len = max_length)
61 }
62}
63
64pub fn sanitize_json(value: &Value) -> Value {
87 let sensitive_set: HashSet<&str> = SENSITIVE_FIELDS.iter().copied().collect();
88 sanitize_json_recursive(value, &sensitive_set)
89}
90
91fn sanitize_json_recursive(value: &Value, sensitive_fields: &HashSet<&str>) -> Value {
92 match value {
93 Value::Object(map) => {
94 let mut sanitized = serde_json::Map::new();
95 for (key, val) in map {
96 let key_lower = key.to_lowercase();
97 if sensitive_fields
98 .iter()
99 .any(|&field| key_lower.contains(field))
100 {
101 sanitized.insert(key.clone(), Value::String("[REDACTED]".to_string()));
102 } else {
103 sanitized.insert(key.clone(), sanitize_json_recursive(val, sensitive_fields));
104 }
105 }
106 Value::Object(sanitized)
107 }
108 Value::Array(arr) => Value::Array(
109 arr.iter()
110 .map(|v| sanitize_json_recursive(v, sensitive_fields))
111 .collect(),
112 ),
113 _ => value.clone(),
114 }
115}
116
117pub fn redact_pii(text: &str) -> String {
132 let mut result = text.to_string();
133
134 result = EMAIL_PATTERN.replace_all(&result, "[EMAIL]").to_string();
136
137 result = PHONE_PATTERN.replace_all(&result, "[PHONE]").to_string();
139
140 result = CREDIT_CARD_PATTERN
142 .replace_all(&result, "[CARD]")
143 .to_string();
144
145 result
146}
147
148pub fn safe_preview(text: &str, max_length: usize) -> String {
163 let redacted = redact_pii(text);
164 truncate_string(&redacted, max_length)
165}
166
167pub fn sanitize_tool_payload(payload: &Value, max_length: usize) -> String {
192 let sanitized_json = sanitize_json(payload);
193 let json_str = sanitized_json.to_string();
194 safe_preview(&json_str, max_length)
195}
196
197#[cfg(test)]
198mod tests {
199 use super::*;
200 use serde_json::json;
201
202 #[test]
203 fn test_truncate_string_short() {
204 let text = "Hello, world!";
205 assert_eq!(truncate_string(text, 100), "Hello, world!");
206 }
207
208 #[test]
209 fn test_truncate_string_long() {
210 let text = "a".repeat(150);
211 let truncated = truncate_string(&text, 100);
212 assert_eq!(truncated.len(), 103); assert!(truncated.ends_with("..."));
214 assert_eq!(&truncated[..100], &text[..100]);
215 }
216
217 #[test]
218 fn test_truncate_string_exact() {
219 let text = "a".repeat(100);
220 let truncated = truncate_string(&text, 100);
221 assert_eq!(truncated.len(), 100);
222 assert!(!truncated.ends_with("..."));
223 }
224
225 #[test]
227 fn test_truncate_string_empty() {
228 let text = "";
229 assert_eq!(truncate_string(text, 10), "");
230 assert_eq!(truncate_string(text, 0), "");
231 }
232
233 #[test]
234 fn test_truncate_string_composite_emoji() {
235 let family = "๐จโ๐ฉโ๐งโ๐ฆ";
238 let result = truncate_string(family, 3);
239 assert_eq!(result.chars().count(), 6); assert!(result.starts_with("๐จโ๐ฉ"));
242 }
243
244 #[test]
245 fn test_sanitize_json_simple() {
246 let input = json!({
247 "username": "john",
248 "password": "secret123"
249 });
250
251 let sanitized = sanitize_json(&input);
252 assert_eq!(sanitized["username"], "john");
253 assert_eq!(sanitized["password"], "[REDACTED]");
254 }
255
256 #[test]
257 fn test_sanitize_json_nested() {
258 let input = json!({
259 "user": {
260 "name": "john",
261 "credentials": {
262 "password": "secret123",
263 "api_key": "sk-1234567890"
264 }
265 }
266 });
267
268 let sanitized = sanitize_json(&input);
269 assert_eq!(sanitized["user"]["name"], "john");
270 assert_eq!(sanitized["user"]["credentials"]["password"], "[REDACTED]");
271 assert_eq!(sanitized["user"]["credentials"]["api_key"], "[REDACTED]");
272 }
273
274 #[test]
275 fn test_sanitize_json_array() {
276 let input = json!({
277 "users": [
278 {"name": "john", "password": "secret1"},
279 {"name": "jane", "token": "abc123"}
280 ]
281 });
282
283 let sanitized = sanitize_json(&input);
284 assert_eq!(sanitized["users"][0]["name"], "john");
285 assert_eq!(sanitized["users"][0]["password"], "[REDACTED]");
286 assert_eq!(sanitized["users"][1]["name"], "jane");
287 assert_eq!(sanitized["users"][1]["token"], "[REDACTED]");
288 }
289
290 #[test]
291 fn test_sanitize_json_case_insensitive() {
292 let input = json!({
293 "Password": "secret123",
294 "API_KEY": "sk-1234567890",
295 "AccessToken": "token123"
296 });
297
298 let sanitized = sanitize_json(&input);
299 assert_eq!(sanitized["Password"], "[REDACTED]");
300 assert_eq!(sanitized["API_KEY"], "[REDACTED]");
301 assert_eq!(sanitized["AccessToken"], "[REDACTED]");
302 }
303
304 #[test]
305 fn test_redact_pii_email() {
306 let text = "Contact me at john.doe@example.com for more info";
307 let redacted = redact_pii(text);
308 assert!(redacted.contains("[EMAIL]"));
309 assert!(!redacted.contains("john.doe@example.com"));
310 }
311
312 #[test]
313 fn test_redact_pii_phone() {
314 let text = "Call me at 555-123-4567 or (555) 987-6543";
315 let redacted = redact_pii(text);
316 assert!(redacted.contains("[PHONE]"));
317 assert!(!redacted.contains("555-123-4567"));
318 assert!(!redacted.contains("555) 987-6543"));
319 }
320
321 #[test]
322 fn test_redact_pii_credit_card() {
323 let text = "Card number: 4532-1234-5678-9010";
324 let redacted = redact_pii(text);
325 assert!(redacted.contains("[CARD]"));
326 assert!(!redacted.contains("4532-1234-5678-9010"));
327 }
328
329 #[test]
330 fn test_redact_pii_multiple() {
331 let text = "Email: john@example.com, Phone: 555-123-1234, Card: 4532123456789010";
332 let redacted = redact_pii(text);
333 assert!(redacted.contains("[EMAIL]"));
334 assert!(redacted.contains("[PHONE]"));
335 assert!(redacted.contains("[CARD]"));
336 }
337
338 #[test]
339 fn test_safe_preview() {
340 let text = "My email is john@example.com and here's a very long message that goes on and on and on and on and on and on";
341 let preview = safe_preview(text, 50);
342
343 assert!(preview.len() <= 53); assert!(preview.contains("[EMAIL]"));
348 assert!(!preview.contains("john@example.com"));
349 }
350
351 #[test]
352 fn test_sanitize_tool_payload() {
353 let payload = json!({
354 "password": "secret123",
355 "api_key": "sk-1234567890",
356 "user": "john@example.com"
357 });
358
359 let sanitized = sanitize_tool_payload(&payload, 100);
360
361 assert!(
363 sanitized.len() <= 103,
364 "Length should be <= 103, got: {}",
365 sanitized.len()
366 );
367
368 assert!(
370 sanitized.contains("[REDACTED]"),
371 "Expected [REDACTED] in output, got: {}",
372 sanitized
373 );
374
375 assert!(
377 sanitized.contains("[EMAIL]"),
378 "Expected [EMAIL] in output, got: {}",
379 sanitized
380 );
381 }
382
383 #[test]
384 fn test_sanitize_tool_payload_long_message() {
385 let payload = json!({
386 "password": "secret123",
387 "message": "a".repeat(200)
388 });
389
390 let sanitized = sanitize_tool_payload(&payload, 100);
391
392 assert!(sanitized.len() <= 103);
394
395 assert!(sanitized.contains("[REDACTED]") || sanitized.ends_with("..."));
399 }
400
401 #[test]
402 fn test_sanitize_tool_payload_no_sensitive_data() {
403 let payload = json!({
404 "action": "get_weather",
405 "location": "Dubai"
406 });
407
408 let sanitized = sanitize_tool_payload(&payload, 100);
409 assert!(sanitized.contains("get_weather"));
410 assert!(sanitized.contains("Dubai"));
411 assert!(!sanitized.contains("[REDACTED]"));
412 }
413}