Skip to main content

arbiter_credential/
response_classifier.rs

1//! Response body data classification.
2//!
3//! Scans upstream response bodies for sensitive data patterns (PII, secrets,
4//! internal infrastructure) and reports findings with sensitivity levels.
5//! Used by the gateway to enforce data sensitivity ceilings per session.
6
7use regex::Regex;
8use std::sync::LazyLock;
9
10/// Sensitivity level of detected data in a response body.
11///
12/// Ordered from least to most sensitive, matching the ordering of
13/// [`arbiter_session::DataSensitivity`] (minus `Public`, which cannot be
14/// "detected" — it is the absence of sensitive content).
15#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)]
16pub enum DetectedSensitivity {
17    /// Emails, internal IPs — organizational metadata.
18    Internal,
19    /// PII: SSNs, credit card numbers, phone numbers.
20    Confidential,
21    /// Secrets: private keys, AWS keys, bearer tokens, API keys.
22    Restricted,
23}
24
25/// A finding from response body scanning.
26#[derive(Debug, Clone)]
27pub struct DataFinding {
28    /// The sensitivity level of the detected pattern.
29    pub sensitivity: DetectedSensitivity,
30    /// Human-readable name of the pattern that matched.
31    pub pattern_name: &'static str,
32}
33
34// ---------------------------------------------------------------------------
35// Pattern definitions (LazyLock<Regex>)
36// ---------------------------------------------------------------------------
37
38// ── Restricted ─────────────────────────────────────────────────────────
39
40static AWS_ACCESS_KEY: LazyLock<Regex> =
41    LazyLock::new(|| Regex::new(r"AKIA[0-9A-Z]{16}").expect("AWS access key regex is valid"));
42
43static PRIVATE_KEY: LazyLock<Regex> = LazyLock::new(|| {
44    Regex::new(r"-----BEGIN.*PRIVATE KEY-----").expect("private key regex is valid")
45});
46
47static BEARER_TOKEN_JSON: LazyLock<Regex> = LazyLock::new(|| {
48    Regex::new(r#"[Bb]earer\s+[a-zA-Z0-9._\-]{20,}"#).expect("bearer token regex is valid")
49});
50
51static GENERIC_API_KEY: LazyLock<Regex> =
52    LazyLock::new(|| Regex::new(r"sk-[a-zA-Z0-9]{20,}").expect("generic API key regex is valid"));
53
54// ── Confidential ───────────────────────────────────────────────────────
55
56static SSN: LazyLock<Regex> =
57    LazyLock::new(|| Regex::new(r"\b\d{3}-\d{2}-\d{4}\b").expect("SSN regex is valid"));
58
59static CREDIT_CARD: LazyLock<Regex> = LazyLock::new(|| {
60    Regex::new(r"\b(?:\d{4}[-\s]?){3}\d{4}\b").expect("credit card regex is valid")
61});
62
63static EMAIL_ADDRESS: LazyLock<Regex> = LazyLock::new(|| {
64    Regex::new(r"\b[A-Za-z0-9._%+\-]+@[A-Za-z0-9.\-]+\.[A-Za-z]{2,}\b")
65        .expect("email regex is valid")
66});
67
68// ── Internal ───────────────────────────────────────────────────────────
69
70static INTERNAL_IP: LazyLock<Regex> = LazyLock::new(|| {
71    Regex::new(r"\b(?:10\.\d+\.\d+\.\d+|172\.(?:1[6-9]|2\d|3[01])\.\d+\.\d+|192\.168\.\d+\.\d+)\b")
72        .expect("internal IP regex is valid")
73});
74
75/// All patterns paired with their sensitivity and name.
76static PATTERNS: LazyLock<Vec<(DetectedSensitivity, &'static str, &'static LazyLock<Regex>)>> =
77    LazyLock::new(|| {
78        vec![
79            // Restricted
80            (
81                DetectedSensitivity::Restricted,
82                "AWS access key",
83                &AWS_ACCESS_KEY,
84            ),
85            (DetectedSensitivity::Restricted, "private key", &PRIVATE_KEY),
86            (
87                DetectedSensitivity::Restricted,
88                "bearer token",
89                &BEARER_TOKEN_JSON,
90            ),
91            (
92                DetectedSensitivity::Restricted,
93                "generic API key (sk-)",
94                &GENERIC_API_KEY,
95            ),
96            // Confidential
97            (DetectedSensitivity::Confidential, "SSN", &SSN),
98            (
99                DetectedSensitivity::Confidential,
100                "credit card number",
101                &CREDIT_CARD,
102            ),
103            (
104                DetectedSensitivity::Confidential,
105                "email address",
106                &EMAIL_ADDRESS,
107            ),
108            // Internal
109            (
110                DetectedSensitivity::Internal,
111                "internal IP address",
112                &INTERNAL_IP,
113            ),
114        ]
115    });
116
117/// Scan a response body for sensitive data patterns.
118///
119/// Returns all findings. Callers should compare the highest finding sensitivity
120/// against the session's `data_sensitivity_ceiling` to decide whether to block
121/// or flag the response.
122pub fn scan_response(body: &str) -> Vec<DataFinding> {
123    let mut findings = Vec::new();
124    for (sensitivity, name, pattern) in PATTERNS.iter() {
125        if pattern.is_match(body) {
126            findings.push(DataFinding {
127                sensitivity: *sensitivity,
128                pattern_name: name,
129            });
130        }
131    }
132    findings
133}
134
135/// Return the highest (most sensitive) finding, if any.
136pub fn max_sensitivity(findings: &[DataFinding]) -> Option<DetectedSensitivity> {
137    findings.iter().map(|f| f.sensitivity).max()
138}
139
140#[cfg(test)]
141mod tests {
142    use super::*;
143
144    // ── Individual pattern tests ────────────────────────────────────────
145
146    #[test]
147    fn detects_aws_access_key() {
148        let body = r#"{"access_key": "AKIAIOSFODNN7EXAMPLE"}"#;
149        let findings = scan_response(body);
150        assert_eq!(findings.len(), 1);
151        assert_eq!(findings[0].sensitivity, DetectedSensitivity::Restricted);
152        assert_eq!(findings[0].pattern_name, "AWS access key");
153    }
154
155    #[test]
156    fn detects_private_key() {
157        let body = "here is -----BEGIN RSA PRIVATE KEY----- data";
158        let findings = scan_response(body);
159        assert_eq!(findings.len(), 1);
160        assert_eq!(findings[0].sensitivity, DetectedSensitivity::Restricted);
161        assert_eq!(findings[0].pattern_name, "private key");
162    }
163
164    #[test]
165    fn detects_ec_private_key() {
166        let body = "-----BEGIN EC PRIVATE KEY-----\nMHQCAQ...";
167        let findings = scan_response(body);
168        assert!(
169            findings.iter().any(|f| f.pattern_name == "private key"),
170            "should detect EC private keys"
171        );
172    }
173
174    #[test]
175    fn detects_bearer_token() {
176        let body = r#"{"auth": "Bearer eyJhbGciOiJIUzI1NiJ9.payload.signature"}"#;
177        let findings = scan_response(body);
178        assert!(
179            findings.iter().any(|f| f.pattern_name == "bearer token"),
180            "should detect bearer tokens in JSON, findings: {:?}",
181            findings
182        );
183    }
184
185    #[test]
186    fn detects_generic_api_key() {
187        let body = r#"{"key": "sk-abcdefghijklmnopqrstuvwx"}"#;
188        let findings = scan_response(body);
189        assert!(
190            findings
191                .iter()
192                .any(|f| f.pattern_name == "generic API key (sk-)"),
193            "should detect sk- prefixed API keys"
194        );
195    }
196
197    #[test]
198    fn detects_ssn() {
199        let body = r#"{"ssn": "123-45-6789"}"#;
200        let findings = scan_response(body);
201        assert!(
202            findings.iter().any(|f| f.pattern_name == "SSN"),
203            "should detect SSN patterns"
204        );
205        assert_eq!(
206            findings
207                .iter()
208                .find(|f| f.pattern_name == "SSN")
209                .unwrap()
210                .sensitivity,
211            DetectedSensitivity::Confidential
212        );
213    }
214
215    #[test]
216    fn detects_credit_card() {
217        let body = "card: 4111-1111-1111-1111";
218        let findings = scan_response(body);
219        assert!(
220            findings
221                .iter()
222                .any(|f| f.pattern_name == "credit card number"),
223            "should detect credit card numbers"
224        );
225    }
226
227    #[test]
228    fn detects_credit_card_with_spaces() {
229        let body = "card: 4111 1111 1111 1111";
230        let findings = scan_response(body);
231        assert!(
232            findings
233                .iter()
234                .any(|f| f.pattern_name == "credit card number"),
235            "should detect credit card numbers with spaces"
236        );
237    }
238
239    #[test]
240    fn detects_credit_card_contiguous() {
241        let body = "card: 4111111111111111";
242        let findings = scan_response(body);
243        assert!(
244            findings
245                .iter()
246                .any(|f| f.pattern_name == "credit card number"),
247            "should detect contiguous credit card numbers"
248        );
249    }
250
251    #[test]
252    fn detects_email_address() {
253        let body = r#"{"email": "user@example.com"}"#;
254        let findings = scan_response(body);
255        assert!(
256            findings.iter().any(|f| f.pattern_name == "email address"),
257            "should detect email addresses"
258        );
259    }
260
261    #[test]
262    fn detects_internal_ip_10() {
263        let body = "server: 10.0.1.42";
264        let findings = scan_response(body);
265        assert!(
266            findings
267                .iter()
268                .any(|f| f.pattern_name == "internal IP address"),
269            "should detect 10.x.x.x IPs"
270        );
271    }
272
273    #[test]
274    fn detects_internal_ip_172() {
275        let body = "server: 172.16.0.1";
276        let findings = scan_response(body);
277        assert!(
278            findings
279                .iter()
280                .any(|f| f.pattern_name == "internal IP address"),
281            "should detect 172.16-31.x.x IPs"
282        );
283    }
284
285    #[test]
286    fn detects_internal_ip_192_168() {
287        let body = "server: 192.168.1.1";
288        let findings = scan_response(body);
289        assert!(
290            findings
291                .iter()
292                .any(|f| f.pattern_name == "internal IP address"),
293            "should detect 192.168.x.x IPs"
294        );
295    }
296
297    // ── Negative tests ─────────────────────────────────────────────────
298
299    #[test]
300    fn clean_response_has_no_findings() {
301        let body = r#"{"status": "ok", "count": 42, "message": "hello world"}"#;
302        let findings = scan_response(body);
303        assert!(findings.is_empty(), "clean body should have no findings");
304    }
305
306    #[test]
307    fn partial_aws_key_not_matched() {
308        // AKIA + only 5 chars (need 16)
309        let body = "key: AKIA12345";
310        let findings = scan_response(body);
311        assert!(
312            !findings.iter().any(|f| f.pattern_name == "AWS access key"),
313            "partial AWS key (too short) should not match"
314        );
315    }
316
317    #[test]
318    fn short_sk_key_not_matched() {
319        // sk- followed by only 10 chars (need 20+)
320        let body = "key: sk-abc1234567";
321        let findings = scan_response(body);
322        assert!(
323            !findings
324                .iter()
325                .any(|f| f.pattern_name == "generic API key (sk-)"),
326            "short sk- key should not match"
327        );
328    }
329
330    #[test]
331    fn public_ip_not_matched() {
332        let body = "server: 8.8.8.8";
333        let findings = scan_response(body);
334        assert!(
335            !findings
336                .iter()
337                .any(|f| f.pattern_name == "internal IP address"),
338            "public IP 8.8.8.8 should not match"
339        );
340    }
341
342    #[test]
343    fn non_rfc1918_172_not_matched() {
344        // 172.32.x.x is outside the private range (172.16-31)
345        let body = "server: 172.32.0.1";
346        let findings = scan_response(body);
347        assert!(
348            !findings
349                .iter()
350                .any(|f| f.pattern_name == "internal IP address"),
351            "172.32.x.x is not a private IP"
352        );
353    }
354
355    #[test]
356    fn short_bearer_not_matched() {
357        // Bearer token with only 10 chars (need 20+)
358        let body = r#""Bearer abc1234567""#;
359        let findings = scan_response(body);
360        assert!(
361            !findings.iter().any(|f| f.pattern_name == "bearer token"),
362            "short bearer token should not match"
363        );
364    }
365
366    // ── Composite tests ────────────────────────────────────────────────
367
368    #[test]
369    fn multiple_findings_in_one_body() {
370        let body = r#"{
371            "access_key": "AKIAIOSFODNN7EXAMPLE",
372            "ssn": "123-45-6789",
373            "email": "test@internal.corp",
374            "server": "10.0.0.5"
375        }"#;
376        let findings = scan_response(body);
377
378        let pattern_names: Vec<&str> = findings.iter().map(|f| f.pattern_name).collect();
379        assert!(
380            pattern_names.contains(&"AWS access key"),
381            "should find AWS key"
382        );
383        assert!(pattern_names.contains(&"SSN"), "should find SSN");
384        assert!(
385            pattern_names.contains(&"email address"),
386            "should find email"
387        );
388        assert!(
389            pattern_names.contains(&"internal IP address"),
390            "should find internal IP"
391        );
392        assert!(findings.len() >= 4, "should find at least 4 patterns");
393    }
394
395    #[test]
396    fn max_sensitivity_returns_highest() {
397        let body = r#"{
398            "ssn": "123-45-6789",
399            "key": "AKIAIOSFODNN7EXAMPLE"
400        }"#;
401        let findings = scan_response(body);
402        assert_eq!(
403            max_sensitivity(&findings),
404            Some(DetectedSensitivity::Restricted),
405            "max should be Restricted when AWS key is present"
406        );
407    }
408
409    #[test]
410    fn max_sensitivity_empty_findings() {
411        let findings: Vec<DataFinding> = vec![];
412        assert_eq!(max_sensitivity(&findings), None);
413    }
414
415    #[test]
416    fn max_sensitivity_internal_only() {
417        let body = "server at 10.0.0.1";
418        let findings = scan_response(body);
419        assert_eq!(
420            max_sensitivity(&findings),
421            Some(DetectedSensitivity::Internal)
422        );
423    }
424
425    // ── Ordering test ──────────────────────────────────────────────────
426
427    #[test]
428    fn detected_sensitivity_ordering() {
429        assert!(DetectedSensitivity::Internal < DetectedSensitivity::Confidential);
430        assert!(DetectedSensitivity::Confidential < DetectedSensitivity::Restricted);
431        assert!(DetectedSensitivity::Internal < DetectedSensitivity::Restricted);
432    }
433
434    // ── Integration-level test: SSN in Public-ceiling session ──────────
435
436    #[test]
437    fn ssn_exceeds_public_ceiling() {
438        use arbiter_session::DataSensitivity;
439
440        let ceiling = DataSensitivity::Public;
441        let body = r#"{"customer_ssn": "123-45-6789"}"#;
442        let findings = scan_response(body);
443        assert!(!findings.is_empty(), "should detect SSN");
444
445        let max = max_sensitivity(&findings).unwrap();
446        // Map DetectedSensitivity to DataSensitivity for comparison
447        let detected_as_data_sensitivity = match max {
448            DetectedSensitivity::Internal => DataSensitivity::Internal,
449            DetectedSensitivity::Confidential => DataSensitivity::Confidential,
450            DetectedSensitivity::Restricted => DataSensitivity::Restricted,
451        };
452
453        assert!(
454            detected_as_data_sensitivity > ceiling,
455            "Confidential SSN data ({:?}) should exceed Public ceiling ({:?})",
456            detected_as_data_sensitivity,
457            ceiling
458        );
459    }
460
461    #[test]
462    fn internal_data_within_internal_ceiling() {
463        use arbiter_session::DataSensitivity;
464
465        let ceiling = DataSensitivity::Internal;
466        let body = "backend at 10.0.0.5";
467        let findings = scan_response(body);
468        let max = max_sensitivity(&findings).unwrap();
469
470        let detected_as_data_sensitivity = match max {
471            DetectedSensitivity::Internal => DataSensitivity::Internal,
472            DetectedSensitivity::Confidential => DataSensitivity::Confidential,
473            DetectedSensitivity::Restricted => DataSensitivity::Restricted,
474        };
475
476        assert!(
477            detected_as_data_sensitivity <= ceiling,
478            "Internal data should be within Internal ceiling"
479        );
480    }
481
482    #[test]
483    fn restricted_data_blocked_for_confidential_ceiling() {
484        use arbiter_session::DataSensitivity;
485
486        let ceiling = DataSensitivity::Confidential;
487        let body = "key: AKIAIOSFODNN7EXAMPLE";
488        let findings = scan_response(body);
489        let max = max_sensitivity(&findings).unwrap();
490
491        let detected_as_data_sensitivity = match max {
492            DetectedSensitivity::Internal => DataSensitivity::Internal,
493            DetectedSensitivity::Confidential => DataSensitivity::Confidential,
494            DetectedSensitivity::Restricted => DataSensitivity::Restricted,
495        };
496
497        assert!(
498            detected_as_data_sensitivity > ceiling,
499            "Restricted data should exceed Confidential ceiling"
500        );
501    }
502}