Skip to main content

mockforge_intelligence/threat_modeling/
pii_detector.rs

1//! PII (Personally Identifiable Information) detection
2//!
3//! This module detects potential PII exposure in API contracts
4//! by analyzing field names, descriptions, and schema patterns.
5
6use super::types::{ThreatCategory, ThreatFinding, ThreatLevel};
7use mockforge_openapi::OpenApiSpec;
8use regex::Regex;
9use std::collections::HashMap;
10
11/// PII detector for API contracts
12pub struct PiiDetector {
13    /// PII field name patterns
14    pii_patterns: Vec<Regex>,
15    /// Common PII field names
16    pii_field_names: Vec<String>,
17}
18
19impl PiiDetector {
20    /// Create a new PII detector
21    pub fn new(pii_patterns: Vec<String>) -> Self {
22        let regex_patterns: Vec<Regex> = pii_patterns
23            .iter()
24            .filter_map(|p| Regex::new(&format!(r"(?i){}", p)).ok())
25            .collect();
26
27        let field_names = vec![
28            "email".to_string(),
29            "ssn".to_string(),
30            "social_security_number".to_string(),
31            "credit_card".to_string(),
32            "card_number".to_string(),
33            "password".to_string(),
34            "token".to_string(),
35            "secret".to_string(),
36            "api_key".to_string(),
37            "access_token".to_string(),
38            "refresh_token".to_string(),
39            "phone".to_string(),
40            "phone_number".to_string(),
41            "address".to_string(),
42            "date_of_birth".to_string(),
43            "dob".to_string(),
44        ];
45
46        Self {
47            pii_patterns: regex_patterns,
48            pii_field_names: field_names,
49        }
50    }
51
52    /// Detect PII in an OpenAPI spec
53    pub fn detect_pii(&self, spec: &OpenApiSpec) -> Vec<ThreatFinding> {
54        let mut findings = Vec::new();
55
56        // Analyze all paths and schemas
57        for (path, path_item) in &spec.spec.paths.paths {
58            if let openapiv3::ReferenceOr::Item(path_item) = path_item {
59                // Iterate over all HTTP methods
60                let methods = vec![
61                    ("GET", path_item.get.as_ref()),
62                    ("POST", path_item.post.as_ref()),
63                    ("PUT", path_item.put.as_ref()),
64                    ("DELETE", path_item.delete.as_ref()),
65                    ("PATCH", path_item.patch.as_ref()),
66                    ("HEAD", path_item.head.as_ref()),
67                    ("OPTIONS", path_item.options.as_ref()),
68                    ("TRACE", path_item.trace.as_ref()),
69                ];
70
71                for (method, operation_opt) in methods {
72                    let Some(operation) = operation_opt else {
73                        continue;
74                    };
75                    // Analyze request body
76                    if let Some(request_body) = &operation.request_body {
77                        if let Some(ref_or_item) = request_body.as_item() {
78                            for media_type in ref_or_item.content.values() {
79                                if let Some(schema) = &media_type.schema {
80                                    findings.extend(self.analyze_schema(
81                                        schema,
82                                        &format!("{}.{}", method, path),
83                                        "request",
84                                    ));
85                                }
86                            }
87                        }
88                    }
89
90                    // Analyze responses
91                    for (status_code, response) in &operation.responses.responses {
92                        if let openapiv3::ReferenceOr::Item(resp) = response {
93                            for media_type in resp.content.values() {
94                                if let Some(schema) = &media_type.schema {
95                                    findings.extend(self.analyze_schema(
96                                        schema,
97                                        &format!("{}.{}", method, path),
98                                        &format!("response.{}", status_code),
99                                    ));
100                                }
101                            }
102                        }
103                    }
104                }
105            }
106        }
107
108        findings
109    }
110
111    /// Analyze a schema for PII
112    fn analyze_schema(
113        &self,
114        schema_ref: &openapiv3::ReferenceOr<openapiv3::Schema>,
115        base_path: &str,
116        _context: &str,
117    ) -> Vec<ThreatFinding> {
118        let mut findings = Vec::new();
119
120        if let openapiv3::ReferenceOr::Item(schema) = schema_ref {
121            // Check schema description
122            if let Some(description) = &schema.schema_data.description {
123                if self.contains_pii_keywords(description) {
124                    findings.push(ThreatFinding {
125                        finding_type: ThreatCategory::PiiExposure,
126                        severity: ThreatLevel::Medium,
127                        description: format!(
128                            "Schema description contains PII keywords: {}",
129                            description
130                        ),
131                        field_path: Some(format!("{}.description", base_path)),
132                        context: HashMap::new(),
133                        confidence: 0.7,
134                    });
135                }
136            }
137
138            // Check properties
139            if let openapiv3::SchemaKind::Type(openapiv3::Type::Object(obj_type)) =
140                &schema.schema_kind
141            {
142                for (prop_name, prop_schema) in &obj_type.properties {
143                    let field_path = format!("{}.{}", base_path, prop_name);
144
145                    // Check field name
146                    if self.is_pii_field_name(prop_name) {
147                        findings.push(ThreatFinding {
148                            finding_type: ThreatCategory::PiiExposure,
149                            severity: ThreatLevel::High,
150                            description: format!(
151                                "Field '{}' appears to contain PII based on name",
152                                prop_name
153                            ),
154                            field_path: Some(field_path.clone()),
155                            context: HashMap::new(),
156                            confidence: 0.9,
157                        });
158                    }
159
160                    // Recursively check nested schemas
161                    if let openapiv3::ReferenceOr::Item(prop_schema_item) = prop_schema {
162                        if let Some(prop_desc) = &prop_schema_item.as_ref().schema_data.description
163                        {
164                            if self.contains_pii_keywords(prop_desc) {
165                                findings.push(ThreatFinding {
166                                    finding_type: ThreatCategory::PiiExposure,
167                                    severity: ThreatLevel::Medium,
168                                    description: format!(
169                                        "Field '{}' description contains PII keywords",
170                                        prop_name
171                                    ),
172                                    field_path: Some(field_path),
173                                    context: HashMap::new(),
174                                    confidence: 0.7,
175                                });
176                            }
177                        }
178                    }
179                }
180            }
181        }
182
183        findings
184    }
185
186    /// Check if a string contains PII keywords
187    fn contains_pii_keywords(&self, text: &str) -> bool {
188        let text_lower = text.to_lowercase();
189        for pattern in &self.pii_patterns {
190            if pattern.is_match(&text_lower) {
191                return true;
192            }
193        }
194        false
195    }
196
197    /// Check if a field name indicates PII
198    fn is_pii_field_name(&self, field_name: &str) -> bool {
199        let field_lower = field_name.to_lowercase();
200        self.pii_field_names.iter().any(|pii_name| field_lower.contains(pii_name))
201    }
202}
203
204impl Default for PiiDetector {
205    fn default() -> Self {
206        Self::new(vec![
207            "email".to_string(),
208            "ssn".to_string(),
209            "credit.*card".to_string(),
210            "password".to_string(),
211            "token".to_string(),
212            "secret".to_string(),
213        ])
214    }
215}