Skip to main content

a3s_common/
privacy.rs

1//! Privacy classification and data protection
2
3use regex::Regex;
4use serde::{Deserialize, Serialize};
5use std::cmp::Ordering;
6use thiserror::Error;
7
8#[derive(Debug, Error)]
9pub enum PrivacyError {
10    #[error("Invalid regex pattern: {0}")]
11    InvalidPattern(String),
12    #[error("Classification error: {0}")]
13    Classification(String),
14}
15
16/// Sensitivity level for classified data
17#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
18pub enum SensitivityLevel {
19    Public,
20    Normal,
21    Sensitive,
22    HighlySensitive,
23    Critical,
24}
25
26impl std::fmt::Display for SensitivityLevel {
27    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
28        match self {
29            Self::Public => write!(f, "Public"),
30            Self::Normal => write!(f, "Normal"),
31            Self::Sensitive => write!(f, "Sensitive"),
32            Self::HighlySensitive => write!(f, "HighlySensitive"),
33            Self::Critical => write!(f, "Critical"),
34        }
35    }
36}
37
38impl Default for SensitivityLevel {
39    fn default() -> Self {
40        Self::Normal
41    }
42}
43
44impl PartialOrd for SensitivityLevel {
45    fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
46        Some(self.cmp(other))
47    }
48}
49
50impl Ord for SensitivityLevel {
51    fn cmp(&self, other: &Self) -> Ordering {
52        let self_level = match self {
53            Self::Public => 0,
54            Self::Normal => 1,
55            Self::Sensitive => 2,
56            Self::HighlySensitive => 3,
57            Self::Critical => 4,
58        };
59        let other_level = match other {
60            Self::Public => 0,
61            Self::Normal => 1,
62            Self::Sensitive => 2,
63            Self::HighlySensitive => 3,
64            Self::Critical => 4,
65        };
66        self_level.cmp(&other_level)
67    }
68}
69
70/// Classification rule
71#[derive(Debug, Clone, Serialize, Deserialize)]
72pub struct ClassificationRule {
73    pub name: String,
74    pub pattern: String,
75    pub level: SensitivityLevel,
76    pub description: String,
77}
78
79/// A single match found during classification
80#[derive(Debug, Clone)]
81pub struct ClassificationMatch {
82    pub rule_name: String,
83    pub level: SensitivityLevel,
84    pub start: usize,
85    pub end: usize,
86    pub matched_text: String,
87}
88
89/// PII match (alias for ClassificationMatch for compatibility)
90pub type PiiMatch = ClassificationMatch;
91
92/// Classification result
93#[derive(Debug, Clone)]
94pub struct ClassificationResult {
95    pub overall_level: SensitivityLevel,
96    pub matches: Vec<ClassificationMatch>,
97    pub requires_tee: bool,
98}
99
100/// Redaction strategy
101#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
102pub enum RedactionStrategy {
103    Mask,
104    Remove,
105    Hash,
106}
107
108/// Regex-based classifier
109pub struct RegexClassifier {
110    rules: Vec<(String, Regex, SensitivityLevel)>,
111    default_level: SensitivityLevel,
112}
113
114impl RegexClassifier {
115    /// Create a new classifier with the given rules
116    pub fn new(
117        rules: &[ClassificationRule],
118        default_level: SensitivityLevel,
119    ) -> Result<Self, PrivacyError> {
120        let compiled_rules = rules
121            .iter()
122            .map(|rule| {
123                let regex = Regex::new(&rule.pattern)
124                    .map_err(|e| PrivacyError::InvalidPattern(format!("{}: {}", rule.name, e)))?;
125                Ok((rule.name.clone(), regex, rule.level))
126            })
127            .collect::<Result<Vec<_>, PrivacyError>>()?;
128
129        Ok(Self {
130            rules: compiled_rules,
131            default_level,
132        })
133    }
134
135    /// Classify text and return matches
136    pub fn classify(&self, text: &str) -> ClassificationResult {
137        let mut matches = Vec::new();
138        let mut overall_level = self.default_level;
139
140        for (rule_name, regex, level) in &self.rules {
141            for mat in regex.find_iter(text) {
142                matches.push(ClassificationMatch {
143                    rule_name: rule_name.clone(),
144                    level: *level,
145                    start: mat.start(),
146                    end: mat.end(),
147                    matched_text: mat.as_str().to_string(),
148                });
149                if *level > overall_level {
150                    overall_level = *level;
151                }
152            }
153        }
154
155        let requires_tee = overall_level >= SensitivityLevel::Sensitive;
156
157        ClassificationResult {
158            overall_level,
159            matches,
160            requires_tee,
161        }
162    }
163
164    /// Redact sensitive data in text
165    pub fn redact(&self, text: &str, strategy: RedactionStrategy) -> String {
166        let mut result = text.to_string();
167        let classification = self.classify(text);
168
169        // Sort matches by start position in reverse order to avoid offset issues
170        let mut matches = classification.matches;
171        matches.sort_by(|a, b| b.start.cmp(&a.start));
172
173        for mat in matches {
174            let redacted = redact_text(&mat.matched_text, &mat.rule_name, strategy);
175            result.replace_range(mat.start..mat.end, &redacted);
176        }
177
178        result
179    }
180
181    /// Check if text contains sensitive data
182    pub fn contains_sensitive(&self, text: &str) -> bool {
183        self.classify(text).overall_level >= SensitivityLevel::Sensitive
184    }
185
186    /// Get the highest sensitivity level in text
187    pub fn get_sensitivity_level(&self, text: &str) -> SensitivityLevel {
188        self.classify(text).overall_level
189    }
190}
191
192/// Redact text based on rule name and strategy
193pub fn redact_text(text: &str, rule_name: &str, strategy: RedactionStrategy) -> String {
194    match strategy {
195        RedactionStrategy::Mask => match rule_name {
196            "ssn" => "***-**-****".to_string(),
197            "email" => {
198                if let Some(at_pos) = text.find('@') {
199                    format!("****{}", &text[at_pos..])
200                } else {
201                    "[REDACTED]".to_string()
202                }
203            }
204            "credit_card" => {
205                let digits: String = text.chars().filter(|c| c.is_ascii_digit()).collect();
206                if digits.len() >= 4 {
207                    format!("****-****-****-{}", &digits[digits.len() - 4..])
208                } else {
209                    "****-****-****-****".to_string()
210                }
211            }
212            "phone" => "***-***-****".to_string(),
213            _ => "[REDACTED]".to_string(),
214        },
215        RedactionStrategy::Remove => String::new(),
216        RedactionStrategy::Hash => {
217            format!("[HASH:{}]", text.len())
218        }
219    }
220}
221
222/// Default classification rules
223pub fn default_classification_rules() -> Vec<ClassificationRule> {
224    vec![
225        ClassificationRule {
226            name: "credit_card".to_string(),
227            pattern: r"\b\d{4}[-\s]?\d{4}[-\s]?\d{4}[-\s]?\d{4}\b".to_string(),
228            level: SensitivityLevel::HighlySensitive,
229            description: "Credit card number".to_string(),
230        },
231        ClassificationRule {
232            name: "ssn".to_string(),
233            pattern: r"\b\d{3}-\d{2}-\d{4}\b".to_string(),
234            level: SensitivityLevel::HighlySensitive,
235            description: "Social Security Number".to_string(),
236        },
237        ClassificationRule {
238            name: "email".to_string(),
239            pattern: r"\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,}\b".to_string(),
240            level: SensitivityLevel::Sensitive,
241            description: "Email address".to_string(),
242        },
243        ClassificationRule {
244            name: "phone".to_string(),
245            pattern: r"\b\d{3}[-.\s]?\d{3}[-.\s]?\d{4}\b".to_string(),
246            level: SensitivityLevel::Sensitive,
247            description: "Phone number".to_string(),
248        },
249        ClassificationRule {
250            name: "api_key".to_string(),
251            pattern: r"\b[A-Za-z0-9_-]{32,}\b".to_string(),
252            level: SensitivityLevel::Critical,
253            description: "API key or token".to_string(),
254        },
255    ]
256}
257
258/// Default dangerous commands (for command filtering)
259pub fn default_dangerous_commands() -> Vec<String> {
260    vec![
261        "rm -rf".to_string(),
262        "dd if=".to_string(),
263        "mkfs".to_string(),
264        ":(){ :|:& };:".to_string(), // fork bomb
265    ]
266}
267
268/// Keyword matcher configuration
269#[derive(Debug, Clone, Serialize, Deserialize)]
270pub struct KeywordMatcherConfig {
271    pub keywords: Vec<String>,
272    pub case_sensitive: bool,
273    pub sensitive_keywords: Vec<String>,
274    pub tee_threshold: SensitivityLevel,
275}
276
277impl Default for KeywordMatcherConfig {
278    fn default() -> Self {
279        Self {
280            keywords: Vec::new(),
281            case_sensitive: false,
282            sensitive_keywords: Vec::new(),
283            tee_threshold: SensitivityLevel::Sensitive,
284        }
285    }
286}
287
288/// Keyword matcher
289pub struct KeywordMatcher {
290    keywords: Vec<String>,
291    case_sensitive: bool,
292    sensitive_keywords: Vec<String>,
293    tee_threshold: SensitivityLevel,
294}
295
296impl KeywordMatcher {
297    /// Create a new keyword matcher
298    pub fn new(config: KeywordMatcherConfig) -> Self {
299        Self {
300            keywords: config.keywords,
301            case_sensitive: config.case_sensitive,
302            sensitive_keywords: config.sensitive_keywords,
303            tee_threshold: config.tee_threshold,
304        }
305    }
306
307    /// Create from keyword list (legacy)
308    pub fn from_keywords(keywords: Vec<String>) -> Self {
309        Self {
310            keywords,
311            case_sensitive: false,
312            sensitive_keywords: Vec::new(),
313            tee_threshold: SensitivityLevel::Sensitive,
314        }
315    }
316
317    /// Create from config
318    pub fn from_config(config: KeywordMatcherConfig) -> Self {
319        Self::new(config)
320    }
321
322    /// Check if text matches any keyword
323    pub fn matches(&self, text: &str) -> bool {
324        let text_to_check = if self.case_sensitive {
325            text.to_string()
326        } else {
327            text.to_lowercase()
328        };
329
330        let mut all_keywords = self.keywords.iter().chain(self.sensitive_keywords.iter());
331
332        all_keywords.any(|keyword| {
333            let keyword_to_check = if self.case_sensitive {
334                keyword.clone()
335            } else {
336                keyword.to_lowercase()
337            };
338            text_to_check.contains(&keyword_to_check)
339        })
340    }
341
342    /// Classify text based on keyword matches
343    pub fn classify(&self, text: &str) -> SensitivityLevel {
344        let text_to_check = if self.case_sensitive {
345            text.to_string()
346        } else {
347            text.to_lowercase()
348        };
349
350        // Check sensitive keywords first
351        for keyword in &self.sensitive_keywords {
352            let keyword_to_check = if self.case_sensitive {
353                keyword.clone()
354            } else {
355                keyword.to_lowercase()
356            };
357            if text_to_check.contains(&keyword_to_check) {
358                return self.tee_threshold;
359            }
360        }
361
362        // Check regular keywords (personal context, not high sensitivity)
363        if self.matches(text) {
364            SensitivityLevel::Normal
365        } else {
366            SensitivityLevel::Public
367        }
368    }
369}
370
371#[cfg(test)]
372mod tests {
373    use super::*;
374
375    #[test]
376    fn test_sensitivity_level_ordering() {
377        assert!(SensitivityLevel::Critical > SensitivityLevel::HighlySensitive);
378        assert!(SensitivityLevel::HighlySensitive > SensitivityLevel::Sensitive);
379        assert!(SensitivityLevel::Sensitive > SensitivityLevel::Normal);
380        assert!(SensitivityLevel::Normal > SensitivityLevel::Public);
381    }
382
383    #[test]
384    fn test_classifier_credit_card() {
385        let rules = default_classification_rules();
386        let classifier = RegexClassifier::new(&rules, SensitivityLevel::Normal).unwrap();
387
388        let text = "My card is 4111-1111-1111-1111";
389        let result = classifier.classify(text);
390
391        assert_eq!(result.overall_level, SensitivityLevel::HighlySensitive);
392        assert!(result.requires_tee);
393        assert_eq!(result.matches.len(), 1);
394        assert_eq!(result.matches[0].rule_name, "credit_card");
395    }
396
397    #[test]
398    fn test_classifier_email() {
399        let rules = default_classification_rules();
400        let classifier = RegexClassifier::new(&rules, SensitivityLevel::Normal).unwrap();
401
402        let text = "Contact: test@example.com";
403        let result = classifier.classify(text);
404
405        assert_eq!(result.overall_level, SensitivityLevel::Sensitive);
406        assert!(result.requires_tee);
407    }
408
409    #[test]
410    fn test_redact_ssn() {
411        let text = "123-45-6789";
412        let redacted = redact_text(text, "ssn", RedactionStrategy::Mask);
413        assert_eq!(redacted, "***-**-****");
414    }
415
416    #[test]
417    fn test_redact_credit_card() {
418        let text = "4111-1111-1111-1111";
419        let redacted = redact_text(text, "credit_card", RedactionStrategy::Mask);
420        assert_eq!(redacted, "****-****-****-1111");
421    }
422
423    #[test]
424    fn test_redact_email() {
425        let text = "test@example.com";
426        let redacted = redact_text(text, "email", RedactionStrategy::Mask);
427        assert_eq!(redacted, "****@example.com");
428    }
429
430    #[test]
431    fn test_keyword_matcher() {
432        let config = KeywordMatcherConfig {
433            keywords: vec!["secret".to_string()],
434            case_sensitive: false,
435            sensitive_keywords: vec!["password".to_string()],
436            tee_threshold: SensitivityLevel::HighlySensitive,
437        };
438        let matcher = KeywordMatcher::new(config);
439
440        assert!(matcher.matches("This is a secret message"));
441        assert!(matcher.matches("Enter your password"));
442        assert!(!matcher.matches("This is a normal message"));
443    }
444
445    #[test]
446    fn test_keyword_matcher_classify() {
447        let config = KeywordMatcherConfig {
448            keywords: vec![],
449            case_sensitive: false,
450            sensitive_keywords: vec!["confidential".to_string()],
451            tee_threshold: SensitivityLevel::HighlySensitive,
452        };
453        let matcher = KeywordMatcher::new(config);
454
455        assert_eq!(
456            matcher.classify("This is confidential"),
457            SensitivityLevel::HighlySensitive
458        );
459        assert_eq!(matcher.classify("This is public"), SensitivityLevel::Public);
460    }
461}