Skip to main content

voirs_cli/telemetry/
privacy.rs

1//! Privacy controls for telemetry system
2
3use serde::{Deserialize, Serialize};
4use sha2::{Digest, Sha256};
5
6use super::events::{EventMetadata, TelemetryEvent};
7
8/// Anonymization level for privacy
9#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
10pub enum AnonymizationLevel {
11    /// No anonymization - full data collection (use with caution)
12    None,
13
14    /// Low anonymization - hash user IDs, keep most data
15    Low,
16
17    /// Medium anonymization - hash IDs, remove paths, generalize data
18    Medium,
19
20    /// High anonymization - maximum privacy, minimal data collection
21    High,
22}
23
24impl AnonymizationLevel {
25    /// Check if user IDs should be hashed
26    pub fn hash_user_ids(&self) -> bool {
27        !matches!(self, AnonymizationLevel::None)
28    }
29
30    /// Check if file paths should be sanitized
31    pub fn sanitize_paths(&self) -> bool {
32        matches!(self, AnonymizationLevel::Medium | AnonymizationLevel::High)
33    }
34
35    /// Check if metadata should be filtered
36    pub fn filter_metadata(&self) -> bool {
37        matches!(self, AnonymizationLevel::High)
38    }
39
40    /// Check if text content should be removed
41    pub fn remove_text_content(&self) -> bool {
42        matches!(self, AnonymizationLevel::Medium | AnonymizationLevel::High)
43    }
44}
45
46impl std::fmt::Display for AnonymizationLevel {
47    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
48        match self {
49            AnonymizationLevel::None => write!(f, "none"),
50            AnonymizationLevel::Low => write!(f, "low"),
51            AnonymizationLevel::Medium => write!(f, "medium"),
52            AnonymizationLevel::High => write!(f, "high"),
53        }
54    }
55}
56
57/// Privacy control for telemetry
58pub struct PrivacyControl {
59    level: AnonymizationLevel,
60    salt: String,
61}
62
63impl PrivacyControl {
64    /// Create a new privacy control with the specified level
65    pub fn new(level: AnonymizationLevel) -> Self {
66        Self {
67            level,
68            salt: uuid::Uuid::new_v4().to_string(),
69        }
70    }
71
72    /// Apply privacy controls to an event
73    pub fn anonymize_event(&self, mut event: TelemetryEvent) -> TelemetryEvent {
74        // Hash user ID if required
75        if self.level.hash_user_ids() {
76            if let Some(user_id) = event.user_id {
77                event.user_id = Some(self.hash_string(&user_id));
78            }
79        }
80
81        // Sanitize paths in metadata
82        if self.level.sanitize_paths() {
83            self.sanitize_metadata_paths(&mut event.metadata);
84        }
85
86        // Remove text content if required
87        if self.level.remove_text_content() {
88            self.remove_text_from_metadata(&mut event.metadata);
89        }
90
91        // Filter metadata if required
92        if self.level.filter_metadata() {
93            self.filter_sensitive_metadata(&mut event.metadata);
94        }
95
96        event
97    }
98
99    /// Hash a string using SHA-256 with salt
100    fn hash_string(&self, input: &str) -> String {
101        let mut hasher = Sha256::new();
102        hasher.update(input.as_bytes());
103        hasher.update(self.salt.as_bytes());
104        format!("{:x}", hasher.finalize())
105    }
106
107    /// Sanitize file paths in metadata
108    fn sanitize_metadata_paths(&self, metadata: &mut EventMetadata) {
109        let sensitive_keys = ["path", "file", "directory", "output", "input"];
110
111        for key in sensitive_keys {
112            if let Some(value) = metadata.get(key) {
113                let sanitized = self.sanitize_path(value);
114                metadata.set(key, sanitized);
115            }
116        }
117    }
118
119    /// Sanitize a file path by removing user-specific information
120    fn sanitize_path(&self, path: &str) -> String {
121        // Replace user home directory with placeholder
122        let mut sanitized = path.to_string();
123
124        if let Ok(home) = std::env::var("HOME") {
125            if !home.is_empty() {
126                sanitized = sanitized.replace(&home, "$HOME");
127            }
128        }
129
130        if let Ok(userprofile) = std::env::var("USERPROFILE") {
131            if !userprofile.is_empty() {
132                sanitized = sanitized.replace(&userprofile, "$HOME");
133            }
134        }
135
136        // Get just the filename if it's a full path
137        if let Some(filename) = std::path::Path::new(&sanitized)
138            .file_name()
139            .and_then(|s| s.to_str())
140        {
141            filename.to_string()
142        } else {
143            sanitized
144        }
145    }
146
147    /// Remove text content from metadata
148    fn remove_text_from_metadata(&self, metadata: &mut EventMetadata) {
149        let text_keys = ["text", "message", "content", "input_text"];
150
151        for key in text_keys {
152            if let Some(value) = metadata.get(key) {
153                // Replace with length information only
154                metadata.set(key, format!("<redacted {} chars>", value.len()));
155            }
156        }
157    }
158
159    /// Filter sensitive metadata
160    fn filter_sensitive_metadata(&self, metadata: &mut EventMetadata) {
161        let allowed_keys = [
162            "command",
163            "voice",
164            "duration_ms",
165            "success",
166            "error_type",
167            "severity",
168            "metric_name",
169            "value",
170            "unit",
171            "event_type",
172        ];
173
174        // Keep only allowed keys
175        let current_keys: Vec<String> = metadata.keys().cloned().collect();
176        for key in current_keys {
177            if !allowed_keys.contains(&key.as_str()) {
178                metadata.remove(&key);
179            }
180        }
181    }
182
183    /// Get current anonymization level
184    pub fn level(&self) -> AnonymizationLevel {
185        self.level
186    }
187
188    /// Check if data collection is allowed for a specific type
189    pub fn allows_data_type(&self, data_type: &str) -> bool {
190        match self.level {
191            AnonymizationLevel::None | AnonymizationLevel::Low => true,
192            AnonymizationLevel::Medium => {
193                // Allow most data types except personal info
194                !matches!(data_type, "text_content" | "file_path" | "user_name")
195            }
196            AnonymizationLevel::High => {
197                // Allow only essential metrics
198                matches!(
199                    data_type,
200                    "command" | "duration" | "error_type" | "performance"
201                )
202            }
203        }
204    }
205}
206
207#[cfg(test)]
208mod tests {
209    use super::*;
210    use crate::telemetry::events::EventType;
211
212    #[test]
213    fn test_anonymization_level_flags() {
214        assert!(!AnonymizationLevel::None.hash_user_ids());
215        assert!(AnonymizationLevel::Low.hash_user_ids());
216        assert!(AnonymizationLevel::Medium.hash_user_ids());
217        assert!(AnonymizationLevel::High.hash_user_ids());
218
219        assert!(!AnonymizationLevel::None.sanitize_paths());
220        assert!(!AnonymizationLevel::Low.sanitize_paths());
221        assert!(AnonymizationLevel::Medium.sanitize_paths());
222        assert!(AnonymizationLevel::High.sanitize_paths());
223    }
224
225    #[test]
226    fn test_hash_string() {
227        let control = PrivacyControl::new(AnonymizationLevel::Medium);
228        let hash1 = control.hash_string("test");
229        let hash2 = control.hash_string("test");
230
231        assert_eq!(hash1, hash2); // Same input, same hash
232        assert_ne!(hash1, "test"); // Hash is different from input
233        assert_eq!(hash1.len(), 64); // SHA-256 produces 64 hex characters
234    }
235
236    #[test]
237    fn test_user_id_hashing() {
238        let control = PrivacyControl::new(AnonymizationLevel::Low);
239        let mut event =
240            TelemetryEvent::new(EventType::CommandExecuted).with_user_id("user123".to_string());
241
242        let anonymized = control.anonymize_event(event.clone());
243        assert_ne!(anonymized.user_id.as_ref().unwrap(), "user123");
244        assert_eq!(anonymized.user_id.unwrap().len(), 64); // SHA-256 hash
245    }
246
247    #[test]
248    fn test_path_sanitization() {
249        let control = PrivacyControl::new(AnonymizationLevel::Medium);
250
251        // Test with a path containing the actual HOME directory
252        let home = std::env::var("HOME").unwrap_or_else(|_| "/home/user".to_string());
253        let path = format!("{}/documents/file.txt", home);
254        let sanitized = control.sanitize_path(&path);
255
256        // Should extract just the filename
257        assert_eq!(sanitized, "file.txt");
258    }
259
260    #[test]
261    fn test_text_removal() {
262        let control = PrivacyControl::new(AnonymizationLevel::Medium);
263        let mut event = TelemetryEvent::new(EventType::SynthesisRequest);
264        event.metadata.set("text", "Hello, this is sensitive text");
265
266        let anonymized = control.anonymize_event(event);
267        let text_value = anonymized.metadata.get("text").unwrap();
268
269        assert!(text_value.contains("redacted"));
270        assert!(!text_value.contains("Hello"));
271    }
272
273    #[test]
274    fn test_metadata_filtering() {
275        let control = PrivacyControl::new(AnonymizationLevel::High);
276        let mut event = TelemetryEvent::new(EventType::CommandExecuted);
277        event.metadata.set("command", "synthesize");
278        event.metadata.set("user_name", "john_doe");
279        event.metadata.set("duration_ms", "1500");
280
281        let anonymized = control.anonymize_event(event);
282
283        assert!(anonymized.metadata.contains("command"));
284        assert!(anonymized.metadata.contains("duration_ms"));
285        assert!(!anonymized.metadata.contains("user_name"));
286    }
287
288    #[test]
289    fn test_allows_data_type() {
290        let none_control = PrivacyControl::new(AnonymizationLevel::None);
291        assert!(none_control.allows_data_type("text_content"));
292        assert!(none_control.allows_data_type("file_path"));
293
294        let high_control = PrivacyControl::new(AnonymizationLevel::High);
295        assert!(!high_control.allows_data_type("text_content"));
296        assert!(high_control.allows_data_type("command"));
297        assert!(high_control.allows_data_type("performance"));
298    }
299
300    #[test]
301    fn test_anonymization_level_display() {
302        assert_eq!(AnonymizationLevel::None.to_string(), "none");
303        assert_eq!(AnonymizationLevel::Low.to_string(), "low");
304        assert_eq!(AnonymizationLevel::Medium.to_string(), "medium");
305        assert_eq!(AnonymizationLevel::High.to_string(), "high");
306    }
307
308    #[test]
309    fn test_privacy_control_level() {
310        let control = PrivacyControl::new(AnonymizationLevel::Medium);
311        assert_eq!(control.level(), AnonymizationLevel::Medium);
312    }
313}