1use regex::Regex;
8use std::sync::LazyLock;
9
10#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)]
16pub enum DetectedSensitivity {
17 Internal,
19 Confidential,
21 Restricted,
23}
24
25#[derive(Debug, Clone)]
27pub struct DataFinding {
28 pub sensitivity: DetectedSensitivity,
30 pub pattern_name: &'static str,
32}
33
34static AWS_ACCESS_KEY: LazyLock<Regex> =
41 LazyLock::new(|| Regex::new(r"AKIA[0-9A-Z]{16}").expect("AWS access key regex is valid"));
42
43static PRIVATE_KEY: LazyLock<Regex> = LazyLock::new(|| {
44 Regex::new(r"-----BEGIN.*PRIVATE KEY-----").expect("private key regex is valid")
45});
46
47static BEARER_TOKEN_JSON: LazyLock<Regex> = LazyLock::new(|| {
48 Regex::new(r#"[Bb]earer\s+[a-zA-Z0-9._\-]{20,}"#).expect("bearer token regex is valid")
49});
50
51static GENERIC_API_KEY: LazyLock<Regex> =
52 LazyLock::new(|| Regex::new(r"sk-[a-zA-Z0-9]{20,}").expect("generic API key regex is valid"));
53
54static SSN: LazyLock<Regex> =
57 LazyLock::new(|| Regex::new(r"\b\d{3}-\d{2}-\d{4}\b").expect("SSN regex is valid"));
58
59static CREDIT_CARD: LazyLock<Regex> = LazyLock::new(|| {
60 Regex::new(r"\b(?:\d{4}[-\s]?){3}\d{4}\b").expect("credit card regex is valid")
61});
62
63static EMAIL_ADDRESS: LazyLock<Regex> = LazyLock::new(|| {
64 Regex::new(r"\b[A-Za-z0-9._%+\-]+@[A-Za-z0-9.\-]+\.[A-Za-z]{2,}\b")
65 .expect("email regex is valid")
66});
67
68static INTERNAL_IP: LazyLock<Regex> = LazyLock::new(|| {
71 Regex::new(r"\b(?:10\.\d+\.\d+\.\d+|172\.(?:1[6-9]|2\d|3[01])\.\d+\.\d+|192\.168\.\d+\.\d+)\b")
72 .expect("internal IP regex is valid")
73});
74
75static PATTERNS: LazyLock<Vec<(DetectedSensitivity, &'static str, &'static LazyLock<Regex>)>> =
77 LazyLock::new(|| {
78 vec![
79 (
81 DetectedSensitivity::Restricted,
82 "AWS access key",
83 &AWS_ACCESS_KEY,
84 ),
85 (DetectedSensitivity::Restricted, "private key", &PRIVATE_KEY),
86 (
87 DetectedSensitivity::Restricted,
88 "bearer token",
89 &BEARER_TOKEN_JSON,
90 ),
91 (
92 DetectedSensitivity::Restricted,
93 "generic API key (sk-)",
94 &GENERIC_API_KEY,
95 ),
96 (DetectedSensitivity::Confidential, "SSN", &SSN),
98 (
99 DetectedSensitivity::Confidential,
100 "credit card number",
101 &CREDIT_CARD,
102 ),
103 (
104 DetectedSensitivity::Confidential,
105 "email address",
106 &EMAIL_ADDRESS,
107 ),
108 (
110 DetectedSensitivity::Internal,
111 "internal IP address",
112 &INTERNAL_IP,
113 ),
114 ]
115 });
116
117pub fn scan_response(body: &str) -> Vec<DataFinding> {
123 let mut findings = Vec::new();
124 for (sensitivity, name, pattern) in PATTERNS.iter() {
125 if pattern.is_match(body) {
126 findings.push(DataFinding {
127 sensitivity: *sensitivity,
128 pattern_name: name,
129 });
130 }
131 }
132 findings
133}
134
135pub fn max_sensitivity(findings: &[DataFinding]) -> Option<DetectedSensitivity> {
137 findings.iter().map(|f| f.sensitivity).max()
138}
139
140#[cfg(test)]
141mod tests {
142 use super::*;
143
144 #[test]
147 fn detects_aws_access_key() {
148 let body = r#"{"access_key": "AKIAIOSFODNN7EXAMPLE"}"#;
149 let findings = scan_response(body);
150 assert_eq!(findings.len(), 1);
151 assert_eq!(findings[0].sensitivity, DetectedSensitivity::Restricted);
152 assert_eq!(findings[0].pattern_name, "AWS access key");
153 }
154
155 #[test]
156 fn detects_private_key() {
157 let body = "here is -----BEGIN RSA PRIVATE KEY----- data";
158 let findings = scan_response(body);
159 assert_eq!(findings.len(), 1);
160 assert_eq!(findings[0].sensitivity, DetectedSensitivity::Restricted);
161 assert_eq!(findings[0].pattern_name, "private key");
162 }
163
164 #[test]
165 fn detects_ec_private_key() {
166 let body = "-----BEGIN EC PRIVATE KEY-----\nMHQCAQ...";
167 let findings = scan_response(body);
168 assert!(
169 findings.iter().any(|f| f.pattern_name == "private key"),
170 "should detect EC private keys"
171 );
172 }
173
174 #[test]
175 fn detects_bearer_token() {
176 let body = r#"{"auth": "Bearer eyJhbGciOiJIUzI1NiJ9.payload.signature"}"#;
177 let findings = scan_response(body);
178 assert!(
179 findings.iter().any(|f| f.pattern_name == "bearer token"),
180 "should detect bearer tokens in JSON, findings: {:?}",
181 findings
182 );
183 }
184
185 #[test]
186 fn detects_generic_api_key() {
187 let body = r#"{"key": "sk-abcdefghijklmnopqrstuvwx"}"#;
188 let findings = scan_response(body);
189 assert!(
190 findings
191 .iter()
192 .any(|f| f.pattern_name == "generic API key (sk-)"),
193 "should detect sk- prefixed API keys"
194 );
195 }
196
197 #[test]
198 fn detects_ssn() {
199 let body = r#"{"ssn": "123-45-6789"}"#;
200 let findings = scan_response(body);
201 assert!(
202 findings.iter().any(|f| f.pattern_name == "SSN"),
203 "should detect SSN patterns"
204 );
205 assert_eq!(
206 findings
207 .iter()
208 .find(|f| f.pattern_name == "SSN")
209 .unwrap()
210 .sensitivity,
211 DetectedSensitivity::Confidential
212 );
213 }
214
215 #[test]
216 fn detects_credit_card() {
217 let body = "card: 4111-1111-1111-1111";
218 let findings = scan_response(body);
219 assert!(
220 findings
221 .iter()
222 .any(|f| f.pattern_name == "credit card number"),
223 "should detect credit card numbers"
224 );
225 }
226
227 #[test]
228 fn detects_credit_card_with_spaces() {
229 let body = "card: 4111 1111 1111 1111";
230 let findings = scan_response(body);
231 assert!(
232 findings
233 .iter()
234 .any(|f| f.pattern_name == "credit card number"),
235 "should detect credit card numbers with spaces"
236 );
237 }
238
239 #[test]
240 fn detects_credit_card_contiguous() {
241 let body = "card: 4111111111111111";
242 let findings = scan_response(body);
243 assert!(
244 findings
245 .iter()
246 .any(|f| f.pattern_name == "credit card number"),
247 "should detect contiguous credit card numbers"
248 );
249 }
250
251 #[test]
252 fn detects_email_address() {
253 let body = r#"{"email": "user@example.com"}"#;
254 let findings = scan_response(body);
255 assert!(
256 findings.iter().any(|f| f.pattern_name == "email address"),
257 "should detect email addresses"
258 );
259 }
260
261 #[test]
262 fn detects_internal_ip_10() {
263 let body = "server: 10.0.1.42";
264 let findings = scan_response(body);
265 assert!(
266 findings
267 .iter()
268 .any(|f| f.pattern_name == "internal IP address"),
269 "should detect 10.x.x.x IPs"
270 );
271 }
272
273 #[test]
274 fn detects_internal_ip_172() {
275 let body = "server: 172.16.0.1";
276 let findings = scan_response(body);
277 assert!(
278 findings
279 .iter()
280 .any(|f| f.pattern_name == "internal IP address"),
281 "should detect 172.16-31.x.x IPs"
282 );
283 }
284
285 #[test]
286 fn detects_internal_ip_192_168() {
287 let body = "server: 192.168.1.1";
288 let findings = scan_response(body);
289 assert!(
290 findings
291 .iter()
292 .any(|f| f.pattern_name == "internal IP address"),
293 "should detect 192.168.x.x IPs"
294 );
295 }
296
297 #[test]
300 fn clean_response_has_no_findings() {
301 let body = r#"{"status": "ok", "count": 42, "message": "hello world"}"#;
302 let findings = scan_response(body);
303 assert!(findings.is_empty(), "clean body should have no findings");
304 }
305
306 #[test]
307 fn partial_aws_key_not_matched() {
308 let body = "key: AKIA12345";
310 let findings = scan_response(body);
311 assert!(
312 !findings.iter().any(|f| f.pattern_name == "AWS access key"),
313 "partial AWS key (too short) should not match"
314 );
315 }
316
317 #[test]
318 fn short_sk_key_not_matched() {
319 let body = "key: sk-abc1234567";
321 let findings = scan_response(body);
322 assert!(
323 !findings
324 .iter()
325 .any(|f| f.pattern_name == "generic API key (sk-)"),
326 "short sk- key should not match"
327 );
328 }
329
330 #[test]
331 fn public_ip_not_matched() {
332 let body = "server: 8.8.8.8";
333 let findings = scan_response(body);
334 assert!(
335 !findings
336 .iter()
337 .any(|f| f.pattern_name == "internal IP address"),
338 "public IP 8.8.8.8 should not match"
339 );
340 }
341
342 #[test]
343 fn non_rfc1918_172_not_matched() {
344 let body = "server: 172.32.0.1";
346 let findings = scan_response(body);
347 assert!(
348 !findings
349 .iter()
350 .any(|f| f.pattern_name == "internal IP address"),
351 "172.32.x.x is not a private IP"
352 );
353 }
354
355 #[test]
356 fn short_bearer_not_matched() {
357 let body = r#""Bearer abc1234567""#;
359 let findings = scan_response(body);
360 assert!(
361 !findings.iter().any(|f| f.pattern_name == "bearer token"),
362 "short bearer token should not match"
363 );
364 }
365
366 #[test]
369 fn multiple_findings_in_one_body() {
370 let body = r#"{
371 "access_key": "AKIAIOSFODNN7EXAMPLE",
372 "ssn": "123-45-6789",
373 "email": "test@internal.corp",
374 "server": "10.0.0.5"
375 }"#;
376 let findings = scan_response(body);
377
378 let pattern_names: Vec<&str> = findings.iter().map(|f| f.pattern_name).collect();
379 assert!(
380 pattern_names.contains(&"AWS access key"),
381 "should find AWS key"
382 );
383 assert!(pattern_names.contains(&"SSN"), "should find SSN");
384 assert!(
385 pattern_names.contains(&"email address"),
386 "should find email"
387 );
388 assert!(
389 pattern_names.contains(&"internal IP address"),
390 "should find internal IP"
391 );
392 assert!(findings.len() >= 4, "should find at least 4 patterns");
393 }
394
395 #[test]
396 fn max_sensitivity_returns_highest() {
397 let body = r#"{
398 "ssn": "123-45-6789",
399 "key": "AKIAIOSFODNN7EXAMPLE"
400 }"#;
401 let findings = scan_response(body);
402 assert_eq!(
403 max_sensitivity(&findings),
404 Some(DetectedSensitivity::Restricted),
405 "max should be Restricted when AWS key is present"
406 );
407 }
408
409 #[test]
410 fn max_sensitivity_empty_findings() {
411 let findings: Vec<DataFinding> = vec![];
412 assert_eq!(max_sensitivity(&findings), None);
413 }
414
415 #[test]
416 fn max_sensitivity_internal_only() {
417 let body = "server at 10.0.0.1";
418 let findings = scan_response(body);
419 assert_eq!(
420 max_sensitivity(&findings),
421 Some(DetectedSensitivity::Internal)
422 );
423 }
424
425 #[test]
428 fn detected_sensitivity_ordering() {
429 assert!(DetectedSensitivity::Internal < DetectedSensitivity::Confidential);
430 assert!(DetectedSensitivity::Confidential < DetectedSensitivity::Restricted);
431 assert!(DetectedSensitivity::Internal < DetectedSensitivity::Restricted);
432 }
433
434 #[test]
437 fn ssn_exceeds_public_ceiling() {
438 use arbiter_session::DataSensitivity;
439
440 let ceiling = DataSensitivity::Public;
441 let body = r#"{"customer_ssn": "123-45-6789"}"#;
442 let findings = scan_response(body);
443 assert!(!findings.is_empty(), "should detect SSN");
444
445 let max = max_sensitivity(&findings).unwrap();
446 let detected_as_data_sensitivity = match max {
448 DetectedSensitivity::Internal => DataSensitivity::Internal,
449 DetectedSensitivity::Confidential => DataSensitivity::Confidential,
450 DetectedSensitivity::Restricted => DataSensitivity::Restricted,
451 };
452
453 assert!(
454 detected_as_data_sensitivity > ceiling,
455 "Confidential SSN data ({:?}) should exceed Public ceiling ({:?})",
456 detected_as_data_sensitivity,
457 ceiling
458 );
459 }
460
461 #[test]
462 fn internal_data_within_internal_ceiling() {
463 use arbiter_session::DataSensitivity;
464
465 let ceiling = DataSensitivity::Internal;
466 let body = "backend at 10.0.0.5";
467 let findings = scan_response(body);
468 let max = max_sensitivity(&findings).unwrap();
469
470 let detected_as_data_sensitivity = match max {
471 DetectedSensitivity::Internal => DataSensitivity::Internal,
472 DetectedSensitivity::Confidential => DataSensitivity::Confidential,
473 DetectedSensitivity::Restricted => DataSensitivity::Restricted,
474 };
475
476 assert!(
477 detected_as_data_sensitivity <= ceiling,
478 "Internal data should be within Internal ceiling"
479 );
480 }
481
482 #[test]
483 fn restricted_data_blocked_for_confidential_ceiling() {
484 use arbiter_session::DataSensitivity;
485
486 let ceiling = DataSensitivity::Confidential;
487 let body = "key: AKIAIOSFODNN7EXAMPLE";
488 let findings = scan_response(body);
489 let max = max_sensitivity(&findings).unwrap();
490
491 let detected_as_data_sensitivity = match max {
492 DetectedSensitivity::Internal => DataSensitivity::Internal,
493 DetectedSensitivity::Confidential => DataSensitivity::Confidential,
494 DetectedSensitivity::Restricted => DataSensitivity::Restricted,
495 };
496
497 assert!(
498 detected_as_data_sensitivity > ceiling,
499 "Restricted data should exceed Confidential ceiling"
500 );
501 }
502}