mockforge_intelligence/threat_modeling/
pii_detector.rs1use super::types::{ThreatCategory, ThreatFinding, ThreatLevel};
7use mockforge_openapi::OpenApiSpec;
8use regex::Regex;
9use std::collections::HashMap;
10
11pub struct PiiDetector {
13 pii_patterns: Vec<Regex>,
15 pii_field_names: Vec<String>,
17}
18
19impl PiiDetector {
20 pub fn new(pii_patterns: Vec<String>) -> Self {
22 let regex_patterns: Vec<Regex> = pii_patterns
23 .iter()
24 .filter_map(|p| Regex::new(&format!(r"(?i){}", p)).ok())
25 .collect();
26
27 let field_names = vec![
28 "email".to_string(),
29 "ssn".to_string(),
30 "social_security_number".to_string(),
31 "credit_card".to_string(),
32 "card_number".to_string(),
33 "password".to_string(),
34 "token".to_string(),
35 "secret".to_string(),
36 "api_key".to_string(),
37 "access_token".to_string(),
38 "refresh_token".to_string(),
39 "phone".to_string(),
40 "phone_number".to_string(),
41 "address".to_string(),
42 "date_of_birth".to_string(),
43 "dob".to_string(),
44 ];
45
46 Self {
47 pii_patterns: regex_patterns,
48 pii_field_names: field_names,
49 }
50 }
51
52 pub fn detect_pii(&self, spec: &OpenApiSpec) -> Vec<ThreatFinding> {
54 let mut findings = Vec::new();
55
56 for (path, path_item) in &spec.spec.paths.paths {
58 if let openapiv3::ReferenceOr::Item(path_item) = path_item {
59 let methods = vec![
61 ("GET", path_item.get.as_ref()),
62 ("POST", path_item.post.as_ref()),
63 ("PUT", path_item.put.as_ref()),
64 ("DELETE", path_item.delete.as_ref()),
65 ("PATCH", path_item.patch.as_ref()),
66 ("HEAD", path_item.head.as_ref()),
67 ("OPTIONS", path_item.options.as_ref()),
68 ("TRACE", path_item.trace.as_ref()),
69 ];
70
71 for (method, operation_opt) in methods {
72 let Some(operation) = operation_opt else {
73 continue;
74 };
75 if let Some(request_body) = &operation.request_body {
77 if let Some(ref_or_item) = request_body.as_item() {
78 for media_type in ref_or_item.content.values() {
79 if let Some(schema) = &media_type.schema {
80 findings.extend(self.analyze_schema(
81 schema,
82 &format!("{}.{}", method, path),
83 "request",
84 ));
85 }
86 }
87 }
88 }
89
90 for (status_code, response) in &operation.responses.responses {
92 if let openapiv3::ReferenceOr::Item(resp) = response {
93 for media_type in resp.content.values() {
94 if let Some(schema) = &media_type.schema {
95 findings.extend(self.analyze_schema(
96 schema,
97 &format!("{}.{}", method, path),
98 &format!("response.{}", status_code),
99 ));
100 }
101 }
102 }
103 }
104 }
105 }
106 }
107
108 findings
109 }
110
111 fn analyze_schema(
113 &self,
114 schema_ref: &openapiv3::ReferenceOr<openapiv3::Schema>,
115 base_path: &str,
116 _context: &str,
117 ) -> Vec<ThreatFinding> {
118 let mut findings = Vec::new();
119
120 if let openapiv3::ReferenceOr::Item(schema) = schema_ref {
121 if let Some(description) = &schema.schema_data.description {
123 if self.contains_pii_keywords(description) {
124 findings.push(ThreatFinding {
125 finding_type: ThreatCategory::PiiExposure,
126 severity: ThreatLevel::Medium,
127 description: format!(
128 "Schema description contains PII keywords: {}",
129 description
130 ),
131 field_path: Some(format!("{}.description", base_path)),
132 context: HashMap::new(),
133 confidence: 0.7,
134 });
135 }
136 }
137
138 if let openapiv3::SchemaKind::Type(openapiv3::Type::Object(obj_type)) =
140 &schema.schema_kind
141 {
142 for (prop_name, prop_schema) in &obj_type.properties {
143 let field_path = format!("{}.{}", base_path, prop_name);
144
145 if self.is_pii_field_name(prop_name) {
147 findings.push(ThreatFinding {
148 finding_type: ThreatCategory::PiiExposure,
149 severity: ThreatLevel::High,
150 description: format!(
151 "Field '{}' appears to contain PII based on name",
152 prop_name
153 ),
154 field_path: Some(field_path.clone()),
155 context: HashMap::new(),
156 confidence: 0.9,
157 });
158 }
159
160 if let openapiv3::ReferenceOr::Item(prop_schema_item) = prop_schema {
162 if let Some(prop_desc) = &prop_schema_item.as_ref().schema_data.description
163 {
164 if self.contains_pii_keywords(prop_desc) {
165 findings.push(ThreatFinding {
166 finding_type: ThreatCategory::PiiExposure,
167 severity: ThreatLevel::Medium,
168 description: format!(
169 "Field '{}' description contains PII keywords",
170 prop_name
171 ),
172 field_path: Some(field_path),
173 context: HashMap::new(),
174 confidence: 0.7,
175 });
176 }
177 }
178 }
179 }
180 }
181 }
182
183 findings
184 }
185
186 fn contains_pii_keywords(&self, text: &str) -> bool {
188 let text_lower = text.to_lowercase();
189 for pattern in &self.pii_patterns {
190 if pattern.is_match(&text_lower) {
191 return true;
192 }
193 }
194 false
195 }
196
197 fn is_pii_field_name(&self, field_name: &str) -> bool {
199 let field_lower = field_name.to_lowercase();
200 self.pii_field_names.iter().any(|pii_name| field_lower.contains(pii_name))
201 }
202}
203
204impl Default for PiiDetector {
205 fn default() -> Self {
206 Self::new(vec![
207 "email".to_string(),
208 "ssn".to_string(),
209 "credit.*card".to_string(),
210 "password".to_string(),
211 "token".to_string(),
212 "secret".to_string(),
213 ])
214 }
215}