Skip to main content

cloakrs_patterns/
api_key.rs

1use crate::common::{compile_regex, confidence, context_boost, is_boundary};
2use cloakrs_core::{Confidence, EntityType, Locale, PiiEntity, Recognizer, Span};
3use once_cell::sync::Lazy;
4use regex::Regex;
5
6static AWS_ACCESS_KEY_REGEX: Lazy<Regex> = Lazy::new(|| compile_regex(r"\bAKIA[A-Z0-9]{16}\b"));
7static JWT_REGEX: Lazy<Regex> =
8    Lazy::new(|| compile_regex(r"\b[A-Za-z0-9_-]{8,}\.[A-Za-z0-9_-]{8,}\.[A-Za-z0-9_-]{8,}\b"));
9static GENERIC_API_KEY_REGEX: Lazy<Regex> = Lazy::new(|| {
10    compile_regex(
11        r#"(?i)\b(?:api[_-]?key|access[_-]?token|token|secret|authorization)\b\s*(?::|=|=>)\s*(?:bearer\s+)?["']?([A-Za-z0-9][A-Za-z0-9_\-+/=]{19,})["']?"#,
12    )
13});
14
15const SECRET_CONTEXT_WORDS: &[&str] = &[
16    "api_key",
17    "api key",
18    "access_token",
19    "token",
20    "secret",
21    "authorization",
22    "bearer",
23    "credential",
24];
25
26/// Recognizes AWS access key identifiers with the `AKIA` prefix.
27///
28/// # Examples
29///
30/// ```
31/// use cloakrs_core::{EntityType, Recognizer};
32/// use cloakrs_patterns::AwsAccessKeyRecognizer;
33///
34/// let findings = AwsAccessKeyRecognizer.scan("aws AKIAIOSFODNN7EXAMPLE");
35/// assert_eq!(findings[0].entity_type, EntityType::AwsAccessKey);
36/// ```
37#[derive(Debug, Clone, Copy, Default)]
38pub struct AwsAccessKeyRecognizer;
39
40impl Recognizer for AwsAccessKeyRecognizer {
41    fn id(&self) -> &str {
42        "aws_access_key_v1"
43    }
44
45    fn entity_type(&self) -> EntityType {
46        EntityType::AwsAccessKey
47    }
48
49    fn supported_locales(&self) -> &[Locale] {
50        &[]
51    }
52
53    fn scan(&self, text: &str) -> Vec<PiiEntity> {
54        AWS_ACCESS_KEY_REGEX
55            .find_iter(text)
56            .filter(|matched| self.is_valid_match(text, matched.start(), matched.end()))
57            .map(|matched| PiiEntity {
58                entity_type: self.entity_type(),
59                span: Span::new(matched.start(), matched.end()),
60                text: matched.as_str().to_string(),
61                confidence: self.compute_confidence(text, matched.start()),
62                recognizer_id: self.id().to_string(),
63            })
64            .collect()
65    }
66
67    fn validate(&self, candidate: &str) -> bool {
68        candidate.len() == 20
69            && candidate.starts_with("AKIA")
70            && candidate
71                .chars()
72                .all(|c| c.is_ascii_uppercase() || c.is_ascii_digit())
73    }
74}
75
76impl AwsAccessKeyRecognizer {
77    fn is_valid_match(&self, text: &str, start: usize, end: usize) -> bool {
78        self.validate(&text[start..end]) && is_boundary(text, start, end)
79    }
80
81    fn compute_confidence(&self, text: &str, start: usize) -> Confidence {
82        confidence(0.99 + context_boost(text, start, SECRET_CONTEXT_WORDS))
83    }
84}
85
86/// Recognizes JSON Web Tokens in `header.payload.signature` form.
87///
88/// # Examples
89///
90/// ```
91/// use cloakrs_core::{EntityType, Recognizer};
92/// use cloakrs_patterns::JwtRecognizer;
93///
94/// let token = "eyJhbGciOiJIUzI1NiJ9.eyJzdWIiOiIxMjM0NTY3ODkwIn0.abc123456789_xyz";
95/// let findings = JwtRecognizer.scan(token);
96/// assert_eq!(findings[0].entity_type, EntityType::Jwt);
97/// ```
98#[derive(Debug, Clone, Copy, Default)]
99pub struct JwtRecognizer;
100
101impl Recognizer for JwtRecognizer {
102    fn id(&self) -> &str {
103        "jwt_regex_v1"
104    }
105
106    fn entity_type(&self) -> EntityType {
107        EntityType::Jwt
108    }
109
110    fn supported_locales(&self) -> &[Locale] {
111        &[]
112    }
113
114    fn scan(&self, text: &str) -> Vec<PiiEntity> {
115        JWT_REGEX
116            .find_iter(text)
117            .filter(|matched| self.is_valid_match(text, matched.start(), matched.end()))
118            .map(|matched| PiiEntity {
119                entity_type: self.entity_type(),
120                span: Span::new(matched.start(), matched.end()),
121                text: matched.as_str().to_string(),
122                confidence: self.compute_confidence(text, matched.start()),
123                recognizer_id: self.id().to_string(),
124            })
125            .collect()
126    }
127
128    fn validate(&self, candidate: &str) -> bool {
129        let mut parts = candidate.split('.');
130        let Some(header) = parts.next() else {
131            return false;
132        };
133        let Some(payload) = parts.next() else {
134            return false;
135        };
136        let Some(signature) = parts.next() else {
137            return false;
138        };
139        parts.next().is_none()
140            && header.starts_with("ey")
141            && payload.starts_with("ey")
142            && validate_jwt_segment(header, 8)
143            && validate_jwt_segment(payload, 8)
144            && validate_jwt_segment(signature, 8)
145    }
146}
147
148impl JwtRecognizer {
149    fn is_valid_match(&self, text: &str, start: usize, end: usize) -> bool {
150        self.validate(&text[start..end]) && is_secret_boundary(text, start, end)
151    }
152
153    fn compute_confidence(&self, text: &str, start: usize) -> Confidence {
154        confidence(0.92 + context_boost(text, start, SECRET_CONTEXT_WORDS))
155    }
156}
157
158/// Recognizes generic API keys and bearer-style tokens with nearby key context.
159///
160/// # Examples
161///
162/// ```
163/// use cloakrs_core::{EntityType, Recognizer};
164/// use cloakrs_patterns::ApiKeyRecognizer;
165///
166/// let findings = ApiKeyRecognizer.scan("api_key = sk_live_0123456789abcdef");
167/// assert_eq!(findings[0].entity_type, EntityType::ApiKey);
168/// ```
169#[derive(Debug, Clone, Copy, Default)]
170pub struct ApiKeyRecognizer;
171
172impl Recognizer for ApiKeyRecognizer {
173    fn id(&self) -> &str {
174        "api_key_context_v1"
175    }
176
177    fn entity_type(&self) -> EntityType {
178        EntityType::ApiKey
179    }
180
181    fn supported_locales(&self) -> &[Locale] {
182        &[]
183    }
184
185    fn scan(&self, text: &str) -> Vec<PiiEntity> {
186        GENERIC_API_KEY_REGEX
187            .captures_iter(text)
188            .filter_map(|captures| captures.get(1))
189            .filter(|matched| self.is_valid_match(text, matched.start(), matched.end()))
190            .map(|matched| PiiEntity {
191                entity_type: self.entity_type(),
192                span: Span::new(matched.start(), matched.end()),
193                text: matched.as_str().to_string(),
194                confidence: self.compute_confidence(text, matched.start(), matched.as_str()),
195                recognizer_id: self.id().to_string(),
196            })
197            .collect()
198    }
199
200    fn validate(&self, candidate: &str) -> bool {
201        validate_generic_secret(candidate)
202    }
203}
204
205impl ApiKeyRecognizer {
206    fn is_valid_match(&self, text: &str, start: usize, end: usize) -> bool {
207        self.validate(&text[start..end]) && is_secret_boundary(text, start, end)
208    }
209
210    fn compute_confidence(&self, text: &str, start: usize, candidate: &str) -> Confidence {
211        let base = if looks_structured_secret(candidate) {
212            0.85
213        } else {
214            0.75
215        };
216        confidence(base + context_boost(text, start, SECRET_CONTEXT_WORDS))
217    }
218}
219
220fn validate_jwt_segment(segment: &str, min_len: usize) -> bool {
221    segment.len() >= min_len
222        && segment
223            .chars()
224            .all(|c| c.is_ascii_alphanumeric() || matches!(c, '_' | '-'))
225}
226
227fn validate_generic_secret(candidate: &str) -> bool {
228    let trimmed = candidate.trim_matches(|c| matches!(c, '"' | '\'' | ',' | ';'));
229    trimmed.len() >= 20
230        && !trimmed.chars().all(|c| c == trimmed.as_bytes()[0] as char)
231        && trimmed
232            .chars()
233            .all(|c| c.is_ascii_alphanumeric() || matches!(c, '_' | '-' | '+' | '/' | '='))
234}
235
236fn looks_structured_secret(candidate: &str) -> bool {
237    let has_letter = candidate.chars().any(|c| c.is_ascii_alphabetic());
238    let has_digit = candidate.chars().any(|c| c.is_ascii_digit());
239    let has_symbol = candidate
240        .chars()
241        .any(|c| matches!(c, '_' | '-' | '+' | '/' | '='));
242    has_letter && has_digit && has_symbol
243}
244
245fn is_secret_boundary(text: &str, start: usize, end: usize) -> bool {
246    let before = text[..start].chars().next_back();
247    let after = text[end..].chars().next();
248    !before.is_some_and(is_secret_prefix_char) && !after.is_some_and(is_secret_suffix_char)
249}
250
251fn is_secret_prefix_char(c: char) -> bool {
252    c.is_ascii_alphanumeric() || matches!(c, '_' | '-' | '+' | '/' | '.')
253}
254
255fn is_secret_suffix_char(c: char) -> bool {
256    c.is_ascii_alphanumeric() || matches!(c, '_' | '-' | '+' | '/' | '=' | '.')
257}
258
259#[cfg(test)]
260mod tests {
261    use super::*;
262    use crate::default_registry;
263
264    fn aws_texts(input: &str) -> Vec<String> {
265        AwsAccessKeyRecognizer
266            .scan(input)
267            .into_iter()
268            .map(|finding| finding.text)
269            .collect()
270    }
271
272    fn jwt_texts(input: &str) -> Vec<String> {
273        JwtRecognizer
274            .scan(input)
275            .into_iter()
276            .map(|finding| finding.text)
277            .collect()
278    }
279
280    fn api_key_texts(input: &str) -> Vec<String> {
281        ApiKeyRecognizer
282            .scan(input)
283            .into_iter()
284            .map(|finding| finding.text)
285            .collect()
286    }
287
288    #[test]
289    fn test_aws_access_key_valid_detected() {
290        assert_eq!(
291            aws_texts("aws AKIAIOSFODNN7EXAMPLE"),
292            ["AKIAIOSFODNN7EXAMPLE"]
293        );
294    }
295
296    #[test]
297    fn test_aws_access_key_token_context_detected() {
298        assert_eq!(
299            aws_texts("access token AKIA1234567890ABCDEF"),
300            ["AKIA1234567890ABCDEF"]
301        );
302    }
303
304    #[test]
305    fn test_aws_access_key_lowercase_rejected() {
306        assert!(aws_texts("akiaiosfodnn7example").is_empty());
307    }
308
309    #[test]
310    fn test_aws_access_key_too_short_rejected() {
311        assert!(aws_texts("AKIAIOSFODNN7EXAMP").is_empty());
312    }
313
314    #[test]
315    fn test_aws_access_key_embedded_in_word_rejected() {
316        assert!(aws_texts("idAKIAIOSFODNN7EXAMPLE").is_empty());
317    }
318
319    #[test]
320    fn test_aws_access_key_context_boosts_confidence() {
321        let with_context = AwsAccessKeyRecognizer.scan("secret AKIAIOSFODNN7EXAMPLE");
322        let without_context = AwsAccessKeyRecognizer.scan("value AKIAIOSFODNN7EXAMPLE");
323        assert!(with_context[0].confidence >= without_context[0].confidence);
324    }
325
326    #[test]
327    fn test_jwt_valid_detected() {
328        let token = "eyJhbGciOiJIUzI1NiJ9.eyJzdWIiOiIxMjM0NTY3ODkwIn0.abc123456789_xyz";
329        assert_eq!(jwt_texts(token), [token]);
330    }
331
332    #[test]
333    fn test_jwt_with_bearer_context_detected() {
334        let token = "eyJ0eXAiOiJKV1QifQ.eyJyb2xlIjoiYWRtaW4ifQ.signature_123456";
335        assert_eq!(jwt_texts(&format!("Bearer {token}")), [token]);
336    }
337
338    #[test]
339    fn test_jwt_short_version_like_value_rejected() {
340        assert!(jwt_texts("version 1.2.3").is_empty());
341    }
342
343    #[test]
344    fn test_jwt_two_segments_rejected() {
345        assert!(jwt_texts("eyJhbGciOiJIUzI1NiJ9.eyJzdWIiOiIxMjM").is_empty());
346    }
347
348    #[test]
349    fn test_jwt_embedded_in_larger_secret_rejected() {
350        let token = "xeyJhbGciOiJIUzI1NiJ9.eyJzdWIiOiIxMjM0NTY3ODkwIn0.abc123456789_xyz";
351        assert!(jwt_texts(token).is_empty());
352    }
353
354    #[test]
355    fn test_jwt_context_boosts_confidence() {
356        let token = "eyJhbGciOiJIUzI1NiJ9.eyJzdWIiOiIxMjM0NTY3ODkwIn0.abc123456789_xyz";
357        let with_context = JwtRecognizer.scan(&format!("authorization: bearer {token}"));
358        let without_context = JwtRecognizer.scan(token);
359        assert!(with_context[0].confidence > without_context[0].confidence);
360    }
361
362    #[test]
363    fn test_api_key_after_api_key_label_detected() {
364        assert_eq!(
365            api_key_texts("api_key = sk_live_0123456789abcdef"),
366            ["sk_live_0123456789abcdef"]
367        );
368    }
369
370    #[test]
371    fn test_api_key_after_token_label_detected() {
372        assert_eq!(
373            api_key_texts("token: abcdef1234567890ABCDEF12"),
374            ["abcdef1234567890ABCDEF12"]
375        );
376    }
377
378    #[test]
379    fn test_api_key_after_authorization_bearer_detected() {
380        assert_eq!(
381            api_key_texts("Authorization: Bearer abcdef1234567890ABCDEF12"),
382            ["abcdef1234567890ABCDEF12"]
383        );
384    }
385
386    #[test]
387    fn test_api_key_after_secret_label_detected() {
388        assert_eq!(
389            api_key_texts("secret=ZXhhbXBsZS1zZWNyZXQtdmFsdWU="),
390            ["ZXhhbXBsZS1zZWNyZXQtdmFsdWU="]
391        );
392    }
393
394    #[test]
395    fn test_api_key_without_context_rejected() {
396        assert!(api_key_texts("value abcdef1234567890ABCDEF12").is_empty());
397    }
398
399    #[test]
400    fn test_api_key_short_value_rejected() {
401        assert!(api_key_texts("api_key=abc123").is_empty());
402    }
403
404    #[test]
405    fn test_api_key_repeated_value_rejected() {
406        assert!(api_key_texts("api_key=aaaaaaaaaaaaaaaaaaaa").is_empty());
407    }
408
409    #[test]
410    fn test_api_key_context_boosts_confidence() {
411        let structured = ApiKeyRecognizer.scan("api_key=sk_live_0123456789abcdef");
412        let plain = ApiKeyRecognizer.scan("token=abcdef1234567890ABCDEF12");
413        assert!(structured[0].confidence > plain[0].confidence);
414    }
415
416    #[test]
417    fn test_secret_default_registry_detects_all_secret_types() {
418        let findings = default_registry().scan_all(concat!(
419            "aws AKIAIOSFODNN7EXAMPLE\n",
420            "jwt eyJhbGciOiJIUzI1NiJ9.eyJzdWIiOiIxMjM0NTY3ODkwIn0.abc123456789_xyz\n",
421            "api_key=sk_live_0123456789abcdef\n",
422        ));
423
424        assert!(findings
425            .iter()
426            .any(|finding| finding.entity_type == EntityType::AwsAccessKey));
427        assert!(findings
428            .iter()
429            .any(|finding| finding.entity_type == EntityType::Jwt));
430        assert!(findings
431            .iter()
432            .any(|finding| finding.entity_type == EntityType::ApiKey));
433    }
434}