1use crate::telemetry::{
6 FeedbackEvent, PrivacyConfig, QueryEvent, TelemetryError, TelemetryResult, TraceEvent,
7};
8use regex::Regex;
10use sha2::{Digest, Sha256};
11use std::collections::HashSet;
12
13pub struct PrivacyFilter {
15 config: PrivacyConfig,
17 pii_patterns: Vec<PiiPattern>,
19 sensitive_keywords: HashSet<String>,
21}
22
23struct PiiPattern {
25 #[allow(dead_code)]
27 name: &'static str,
28 regex: Regex,
30 replacement: &'static str,
32}
33
34impl PrivacyFilter {
35 pub fn new(config: PrivacyConfig) -> Self {
37 let pii_patterns = Self::build_pii_patterns();
38 let sensitive_keywords = Self::build_sensitive_keywords();
39
40 Self {
41 config,
42 pii_patterns,
43 sensitive_keywords,
44 }
45 }
46
47 fn build_pii_patterns() -> Vec<PiiPattern> {
50 use once_cell::sync::Lazy;
51
52 static EMAIL_RE: Lazy<Regex> =
54 Lazy::new(|| Regex::new(r"[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}").unwrap());
55 static PHONE_RE: Lazy<Regex> = Lazy::new(|| {
56 Regex::new(r"(\+?1?[-.\s]?)?\(?[0-9]{3}\)?[-.\s]?[0-9]{3}[-.\s]?[0-9]{4}").unwrap()
57 });
58 static SSN_RE: Lazy<Regex> =
59 Lazy::new(|| Regex::new(r"\b\d{3}[-.]?\d{2}[-.]?\d{4}\b").unwrap());
60 static CARD_RE: Lazy<Regex> =
61 Lazy::new(|| Regex::new(r"\b(?:\d{4}[-\s]?){3}\d{4}\b").unwrap());
62 static IP_RE: Lazy<Regex> =
63 Lazy::new(|| Regex::new(r"\b(?:\d{1,3}\.){3}\d{1,3}\b").unwrap());
64 static API_KEY_RE: Lazy<Regex> = Lazy::new(|| {
65 Regex::new(r#"(?i)(api[_-]?key|apikey|secret[_-]?key|auth[_-]?token|bearer)\s*[:=]\s*['"]?[\w-]{20,}['"]?"#).unwrap()
66 });
67 static AWS_KEY_RE: Lazy<Regex> = Lazy::new(|| Regex::new(r"(?i)AKIA[0-9A-Z]{16}").unwrap());
68 static AUTH_URL_RE: Lazy<Regex> =
69 Lazy::new(|| Regex::new(r#"https?://[^:]+:[^@]+@[^\s]+"#).unwrap());
70 static USER_PATH_RE: Lazy<Regex> =
71 Lazy::new(|| Regex::new(r"(?i)(/home/|/users/|C:\\Users\\)[a-zA-Z0-9._-]+").unwrap());
72
73 vec![
74 PiiPattern {
76 name: "email",
77 regex: EMAIL_RE.clone(),
78 replacement: "[EMAIL]",
79 },
80 PiiPattern {
82 name: "phone",
83 regex: PHONE_RE.clone(),
84 replacement: "[PHONE]",
85 },
86 PiiPattern {
88 name: "ssn",
89 regex: SSN_RE.clone(),
90 replacement: "[SSN]",
91 },
92 PiiPattern {
94 name: "credit_card",
95 regex: CARD_RE.clone(),
96 replacement: "[CARD]",
97 },
98 PiiPattern {
100 name: "ip_address",
101 regex: IP_RE.clone(),
102 replacement: "[IP]",
103 },
104 PiiPattern {
106 name: "api_key",
107 regex: API_KEY_RE.clone(),
108 replacement: "[API_KEY]",
109 },
110 PiiPattern {
112 name: "aws_key",
113 regex: AWS_KEY_RE.clone(),
114 replacement: "[AWS_KEY]",
115 },
116 PiiPattern {
118 name: "auth_url",
119 regex: AUTH_URL_RE.clone(),
120 replacement: "[AUTH_URL]",
121 },
122 PiiPattern {
124 name: "user_path",
125 regex: USER_PATH_RE.clone(),
126 replacement: "[USER_PATH]",
127 },
128 ]
129 }
130
131 fn build_sensitive_keywords() -> HashSet<String> {
133 [
134 "password",
135 "passwd",
136 "secret",
137 "token",
138 "credential",
139 "private",
140 "confidential",
141 "sensitive",
142 "ssn",
143 "social",
144 ]
145 .iter()
146 .map(|s| s.to_lowercase())
147 .collect()
148 }
149
150 pub fn strip_pii(&self, text: &str) -> String {
152 if !self.config.strip_pii {
153 return text.to_string();
154 }
155
156 let mut result = text.to_string();
157
158 for pattern in &self.pii_patterns {
159 result = pattern
160 .regex
161 .replace_all(&result, pattern.replacement)
162 .to_string();
163 }
164
165 result
166 }
167
168 pub fn hash_query(&self, query: &str) -> String {
170 let normalized = query
172 .to_lowercase()
173 .split_whitespace()
174 .collect::<Vec<_>>()
175 .join(" ");
176
177 let mut hasher = Sha256::new();
178 hasher.update(normalized.as_bytes());
179 format!("{:x}", hasher.finalize())
180 }
181
182 pub fn contains_sensitive(&self, text: &str) -> bool {
184 let lower = text.to_lowercase();
185 self.sensitive_keywords.iter().any(|kw| lower.contains(kw))
186 }
187
188 pub fn sanitize_query_event(&self, mut event: QueryEvent) -> TelemetryResult<QueryEvent> {
190 if self.config.block_sensitive && self.contains_sensitive(&event.query_text) {
192 return Err(TelemetryError::PrivacyViolation(
193 "Query contains sensitive keywords".to_string(),
194 ));
195 }
196
197 let _query_hash = self.hash_query(&event.query_text);
199 event.query_text = "[HASHED]".to_string(); event.tools_used = event
203 .tools_used
204 .into_iter()
205 .map(|t| self.strip_pii(&t))
206 .collect();
207
208 Ok(event)
209 }
210
211 pub fn sanitize_feedback_event(&self, event: FeedbackEvent) -> TelemetryResult<FeedbackEvent> {
213 Ok(event)
215 }
216
217 pub fn sanitize_trace_event(&self, mut event: TraceEvent) -> TelemetryResult<TraceEvent> {
219 event.step_types = event
221 .step_types
222 .into_iter()
223 .map(|s| self.strip_pii(&s))
224 .collect();
225
226 Ok(event)
227 }
228
229 pub fn add_dp_noise(&self, count: u64) -> u64 {
231 if !self.config.differential_privacy {
232 return count;
233 }
234
235 let epsilon = self.config.dp_epsilon;
237 let sensitivity = 1.0; let scale = sensitivity / epsilon;
239
240 let noise = scale * 0.5; (count as f64 + noise).max(0.0).round() as u64
245 }
246}
247
248#[cfg(test)]
249mod tests {
250 use super::*;
251
252 fn test_config() -> PrivacyConfig {
253 PrivacyConfig {
254 strip_pii: true,
255 block_sensitive: true,
256 differential_privacy: true,
257 dp_epsilon: 1.0,
258 redact_file_paths: true,
259 }
260 }
261
262 #[test]
263 fn test_email_stripping() {
264 let filter = PrivacyFilter::new(test_config());
265 let result = filter.strip_pii("Contact me at user@example.com for details");
266 assert_eq!(result, "Contact me at [EMAIL] for details");
267 }
268
269 #[test]
270 fn test_phone_stripping() {
271 let filter = PrivacyFilter::new(test_config());
272 let result = filter.strip_pii("Call me at 555-123-4567");
273 assert!(
275 result.contains("[PHONE]"),
276 "Expected [PHONE] in: {}",
277 result
278 );
279 assert!(!result.contains("555"), "Phone number should be redacted");
280 }
281
282 #[test]
283 fn test_api_key_stripping() {
284 let filter = PrivacyFilter::new(test_config());
285 let result = filter.strip_pii("Set api_key=sk-abcdefghijklmnopqrstuvwxyz");
287 assert!(
288 result.contains("[API_KEY]"),
289 "Expected [API_KEY] in: {}",
290 result
291 );
292 }
293
294 #[test]
295 fn test_query_hashing() {
296 let filter = PrivacyFilter::new(test_config());
297
298 let hash1 = filter.hash_query("what is chain of thought");
300 let hash2 = filter.hash_query("what is chain of thought");
301 assert_eq!(hash1, hash2);
302
303 let hash3 = filter.hash_query("different query");
305 assert_ne!(hash1, hash3);
306 }
307
308 #[test]
309 fn test_sensitive_detection() {
310 let filter = PrivacyFilter::new(test_config());
311
312 assert!(filter.contains_sensitive("my password is abc123"));
313 assert!(filter.contains_sensitive("This is CONFIDENTIAL"));
314 assert!(!filter.contains_sensitive("This is a normal query"));
315 }
316
317 #[test]
318 fn test_sensitive_blocking() {
319 let filter = PrivacyFilter::new(test_config());
320
321 let event = QueryEvent::new(uuid::Uuid::new_v4(), "my password is abc123".to_string());
322
323 let result = filter.sanitize_query_event(event);
324 assert!(result.is_err());
325 }
326
327 #[test]
328 fn test_user_path_stripping() {
329 let filter = PrivacyFilter::new(test_config());
330 let result = filter.strip_pii("File at /home/johndoe/secrets.txt");
331 assert!(result.contains("[USER_PATH]"));
332 assert!(!result.contains("johndoe"));
333 }
334}