ai_proxy/
redaction.rs

1use regex::Regex;
2use serde::{Deserialize, Serialize};
3use crate::{Error, Result};
4
5#[derive(Debug, Serialize)]
6struct RedactionRequest {
7    prompt: String,
8}
9
10#[derive(Debug, Deserialize)]
11struct RedactionResponse {
12    redacted_prompt: String,
13}
14
15pub struct RedactionEngine {
16    patterns: Vec<Regex>,
17}
18
19pub struct RedactionService {
20    api_url: Option<String>,
21    client: reqwest::Client,
22}
23
24impl RedactionEngine {
25    pub fn new() -> Self {
26        Self {
27            patterns: initialize_sensitive_patterns(),
28        }
29    }
30
31    pub fn redact_sensitive_content(&self, content: &str) -> String {
32        let mut redacted_content = content.to_string();
33        
34        for pattern in &self.patterns {
35            redacted_content = pattern.replace_all(&redacted_content, "REDACTED").to_string();
36        }
37        
38        redacted_content
39    }
40}
41
42impl Default for RedactionEngine {
43    fn default() -> Self {
44        Self::new()
45    }
46}
47
48fn initialize_sensitive_patterns() -> Vec<Regex> {
49    let patterns = vec![
50        // ==== OPENAI & AI PROVIDERS ====
51        r"sk-proj-[A-Za-z0-9_-]+",                // OpenAI project-scoped key
52        r"dtn_[A-Za-z0-9_]+",                     // Daytona API key
53        r"e2b_[A-Za-z0-9_]+",                     // E2B API key
54        r"sk-ant-[A-Za-z0-9_-]+",                 // Anthropic API key
55        r"sk-or-[A-Za-z0-9_-]+",                  // OpenAI org-scoped key
56        r"sk-[A-Za-z0-9_-]+",                     // OpenAI generic secret
57        r"gsk_[A-Za-z0-9_-]+",                    // Google Generative AI Studio key
58        r"xai-[A-Za-z0-9_-]+",                    // xAI key
59
60        // ==== GITHUB ====
61        r"ghp_[A-Za-z0-9_]{36}",                      // GitHub PAT (classic)
62        r"gho_[A-Za-z0-9_]{36}",                      // GitHub OAuth token
63        r"ghs_[A-Za-z0-9_]{36}",                      // GitHub App server-to-server
64        r"ghu_[A-Za-z0-9_]{36}",                      // GitHub App user-to-server
65        r"ghr_[A-Za-z0-9_]{36}",                      // GitHub refresh token
66        r"github_pat_[A-Za-z0-9_]{22}_[A-Za-z0-9_]{59}", // GitHub fine-grained PAT
67
68        // ==== GITLAB ====
69        r"glpat-[A-Za-z0-9_-]+",                  // GitLab PAT
70
71        // ==== AWS ====
72        r"AKIA[0-9A-Z]{16}",                          // AWS access key ID (long-lived)
73        r"ASIA[0-9A-Z]{16}",                          // AWS temporary access key ID
74        r"(?i)(aws_)?secret(access)?(_)?key\s*[:=]\s*[A-Za-z0-9/+=]{35,}", // AWS secret key
75
76        // ==== GOOGLE API KEYS ====
77        r"AIza[0-9A-Za-z-_]{35}",                     // Google API key (common AIza prefix)
78        r"ya29\.[0-9A-Za-z-_]+",                      // Google OAuth access token
79        r"GOCSPX-[A-Za-z0-9-_]+",                 // Google OAuth client secret
80
81        // ==== SLACK ====
82        r"xoxb-[A-Za-z0-9-]+-[A-Za-z0-9-]+-[A-Za-z0-9-]+", // Slack bot token
83        r"xoxp-[A-Za-z0-9-]+-[A-Za-z0-9-]+-[A-Za-z0-9-]+", // Slack user token
84        r"xoxa-[A-Za-z0-9-]+-[A-Za-z0-9-]+-[A-Za-z0-9-]+", // Slack workspace token
85        r"xapp-1-[A-Z0-9-]+-[0-9]+-[A-Za-z0-9]+",          // Slack App level token
86
87        // ==== STRIPE ====
88        r"sk_live_[0-9a-zA-Z]+",                  // Stripe live secret key
89        r"sk_test_[0-9a-zA-Z]+",                  // Stripe test secret key
90        r"rk_live_[0-9a-zA-Z]+",                  // Stripe restricted key live
91        r"rk_test_[0-9a-zA-Z]+",                  // Stripe restricted key test
92        r"whsec_[A-Za-z0-9]+",                    // Stripe webhook secret
93
94        // ==== TWILIO ====
95        r"AC[0-9a-fA-F]{32}",                         // Twilio Account SID
96        r"SK[0-9a-fA-F]{32}",                         // Twilio API Key SID
97
98        // ==== SENDGRID / MAIL PROVIDERS ====
99        r"SG\.[A-Za-z0-9_\-\.]{66}",                  // SendGrid API key
100        r"key-[0-9a-f]{32}",                          // Mailgun API key (specific with 'key-' prefix)
101        r"xkeysib-[A-Za-z0-9]{64}-[A-Za-z0-9]{16}",   // SendInBlue API key
102
103        // ==== PAYMENT / CREDIT CARDS ====
104        r"\b4[0-9]{12}(?:[0-9]{3})?\b",               // Visa (no spaces)
105        r"\b4[0-9]{3}[\s-]?[0-9]{4}[\s-]?[0-9]{4}[\s-]?[0-9]{4}\b", // Visa (with spaces/dashes)
106        r"\b5[1-5][0-9]{14}\b",                       // MasterCard (no spaces)
107        r"\b5[1-5][0-9]{2}[\s-]?[0-9]{4}[\s-]?[0-9]{4}[\s-]?[0-9]{4}\b", // MasterCard (with spaces/dashes)
108        r"\b3[47][0-9]{13}\b",                        // AmEx (no spaces)
109        r"\b3[47][0-9]{2}[\s-]?[0-9]{6}[\s-]?[0-9]{5}\b", // AmEx (with spaces/dashes)
110        r"\b6(?:011|5[0-9]{2})[0-9]{12}\b",           // Discover (no spaces)
111        r"\b6(?:011|5[0-9]{2})[\s-]?[0-9]{4}[\s-]?[0-9]{4}[\s-]?[0-9]{4}\b", // Discover (with spaces/dashes)
112
113        // ==== EMAIL ADDRESSES ====
114        r"\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Za-z]{2,}\b", // Email addresses
115
116        // ==== DATABASE CONNECTION STRINGS - With passwords ====
117        r"\bpostgres(?:ql)?://[A-Za-z0-9_%+.-]+:[^@\s]{1,}@[^\s]+/[A-Za-z0-9_.-]+",
118        r"\bmongodb(?:\+srv)?://[A-Za-z0-9_%+.-]+:[^@\s]{1,}@[^\s]+/[A-Za-z0-9_.-]+",
119        r"\bmysql://[A-Za-z0-9_%+.-]+:[^@\s]{1,}@[^\s]+/[A-Za-z0-9_.-]+",
120        r"\bredis://[A-Za-z0-9_%+.-]+:[^@\s]{1,}@[A-Za-z0-9_.:-]+",
121
122        // ==== JWT TOKENS ====
123        r"\beyJ[A-Za-z0-9_-]+\.[A-Za-z0-9_-]+\.[A-Za-z0-9_-]+\b", // JWT tokens
124
125        // ==== SSH KEYS ====
126        r"-----BEGIN (?:RSA|DSA|EC|OPENSSH) PRIVATE KEY-----[\s\S]+?-----END (?:RSA|DSA|EC|OPENSSH) PRIVATE KEY-----",
127
128        // ==== OTHER SPECIFIC API KEYS ====
129        r"\bDD_API_KEY\b[^\n]{0,40}[:=]\s*[0-9a-f]{32}\b",         // Datadog
130        r"\bDD_APP_KEY\b[^\n]{0,40}[:=]\s*[0-9a-f]{40}\b",         // Datadog
131        r"NRAK-[A-Z0-9]{27}",                                        // New Relic ingest key
132        r"NRAL-[A-Z0-9]{27}",                                        // New Relic license key
133        r"shpat_[a-f0-9]{32}",                                       // Shopify private app access token
134        r"shpss_[a-f0-9]{32}",                                       // Shopify shared secret
135        r"npm_[A-Za-z0-9]{36}",                                      // npm access token
136        r"\bpypi-AgENdGV\w{20,}\b",                                 // PyPI token
137        r"EAA[A-Za-z0-9]+",                                      // Facebook access token
138        r"\bBearer\s+AAAAAAAA[A-Za-z0-9%\-_]+\b",                 // Twitter bearer token
139        r"lin_[A-Za-z0-9]{40}",                                      // Linear API key
140        r"secret_[A-Za-z0-9]{43}",                                   // Notion token
141        r"CFPAT-[A-Za-z0-9_-]+",                                // Contentful API token
142        r"sdk-[A-Za-z0-9_-]+",                                  // LaunchDarkly SDK key (made more specific)
143        r"mob-[A-Za-z0-9_-]+",                                  // LaunchDarkly Mobile key (made more specific)
144        r"r8_[A-Za-z0-9]+",                                      // Replicate API token
145
146        // ==== CRYPTO WALLETS ====
147        r"\b0x[a-fA-F0-9]{40}\b",                                    // Ethereum address
148        r"\b[13][a-km-zA-HJ-NP-Z1-9]{25,34}\b",                     // Bitcoin (legacy)
149        r"\bbc1[qpzry9x8gf2tvdw0s3jn54khce6mua7l]{11,71}\b",       // Bitcoin Bech32
150
151        // ==== SPECIFIC SECRET PATTERNS WITH CONTEXT ====
152        r"(?i)\b(SECRET_KEY|API_SECRET|CLIENT_SECRET|PRIVATE_KEY)\b\s*[:=]\s*[A-Za-z0-9\-_.+/=]+",
153        r"(?i)\b(password|passwd|pwd)\b\s*[:=]\s*[^\s]+",
154        r"(?i)\b(api_key|apikey|api_token|access_token)\b\s*[:=]\s*[A-Za-z0-9\-_.]+",
155    ];
156
157    patterns.into_iter()
158        .filter_map(|pattern| Regex::new(pattern).ok())
159        .collect()
160}
161
162impl RedactionService {
163    pub fn new(api_url: Option<String>) -> Self {
164        Self {
165            api_url,
166            client: reqwest::Client::new(),
167        }
168    }
169
170    pub async fn redact_user_prompt(&self, prompt: &str) -> Result<String> {
171        if let Some(url) = &self.api_url {
172            let request = RedactionRequest {
173                prompt: prompt.to_string(),
174            };
175
176            let response = self
177                .client
178                .post(url)
179                .json(&request)
180                .timeout(std::time::Duration::from_secs(30))
181                .send()
182                .await
183                .map_err(|e| Error::Server(format!("Redaction API request failed: {}", e)))?;
184
185            if !response.status().is_success() {
186                return Err(Error::Server(format!(
187                    "Redaction API returned error status: {}",
188                    response.status()
189                )));
190            }
191
192            let redaction_response: RedactionResponse = response
193                .json()
194                .await
195                .map_err(|e| Error::Server(format!("Failed to parse redaction response: {}", e)))?;
196
197            Ok(redaction_response.redacted_prompt)
198        } else {
199            // No redaction URL provided, return original prompt
200            Ok(prompt.to_string())
201        }
202    }
203}
204
205impl Clone for RedactionService {
206    fn clone(&self) -> Self {
207        Self::new(self.api_url.clone())
208    }
209}