cloakpipe_core/detector/
patterns.rs1use crate::{DetectedEntity, EntityCategory, DetectionSource, config::DetectionConfig};
4use anyhow::Result;
5use regex::Regex;
6
7pub struct PatternDetector {
8 rules: Vec<PatternRule>,
9}
10
11struct PatternRule {
12 regex: Regex,
13 category: EntityCategory,
14 _name: String,
15}
16
17impl PatternDetector {
18 pub fn new(config: &DetectionConfig) -> Result<Self> {
19 let mut rules = Vec::new();
20
21 if config.secrets {
22 rules.push(PatternRule {
24 regex: Regex::new(r"(?i)(AKIA[0-9A-Z]{16})")?,
25 category: EntityCategory::Secret,
26 _name: "aws_access_key".into(),
27 });
28 rules.push(PatternRule {
30 regex: Regex::new(r"(?i)(sk-[a-zA-Z0-9]{32,}|ghp_[a-zA-Z0-9]{36}|gho_[a-zA-Z0-9]{36})")?,
31 category: EntityCategory::Secret,
32 _name: "api_token".into(),
33 });
34 rules.push(PatternRule {
36 regex: Regex::new(r"(?i)(postgres(?:ql)?://[^\s]+|mysql://[^\s]+|mongodb(?:\+srv)?://[^\s]+)")?,
37 category: EntityCategory::Secret,
38 _name: "connection_string".into(),
39 });
40 rules.push(PatternRule {
42 regex: Regex::new(r"eyJ[a-zA-Z0-9_-]+\.eyJ[a-zA-Z0-9_-]+\.[a-zA-Z0-9_-]+")?,
43 category: EntityCategory::Secret,
44 _name: "jwt_token".into(),
45 });
46 }
47
48 if config.emails {
49 rules.push(PatternRule {
50 regex: Regex::new(r"[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}")?,
51 category: EntityCategory::Email,
52 _name: "email".into(),
53 });
54 }
55
56 if config.phone_numbers {
57 rules.push(PatternRule {
58 regex: Regex::new(r"\+?[1-9]\d{0,2}[-.\s]?\(?\d{1,4}\)?[-.\s]?\d{1,4}[-.\s]?\d{1,9}")?,
59 category: EntityCategory::PhoneNumber,
60 _name: "phone".into(),
61 });
62 }
63
64 if config.ip_addresses {
65 rules.push(PatternRule {
66 regex: Regex::new(r"\b(?:\d{1,3}\.){3}\d{1,3}\b")?,
67 category: EntityCategory::IpAddress,
68 _name: "ipv4".into(),
69 });
70 }
71
72 if config.urls_internal {
73 rules.push(PatternRule {
74 regex: Regex::new(r"https?://(?:internal|staging|dev|admin)\.[a-zA-Z0-9.-]+(?:/[^\s]*)?")?,
75 category: EntityCategory::Url,
76 _name: "internal_url".into(),
77 });
78 }
79
80 Ok(Self { rules })
81 }
82
83 pub fn detect(&self, text: &str) -> Result<Vec<DetectedEntity>> {
84 let mut entities = Vec::new();
85 for rule in &self.rules {
86 for mat in rule.regex.find_iter(text) {
87 entities.push(DetectedEntity {
88 original: mat.as_str().to_string(),
89 start: mat.start(),
90 end: mat.end(),
91 category: rule.category.clone(),
92 confidence: 1.0,
93 source: DetectionSource::Pattern,
94 });
95 }
96 }
97 Ok(entities)
98 }
99}