1use std::sync::Arc;
6use aho_corasick::{AhoCorasick, AhoCorasickBuilder, MatchKind};
7use regex::Regex;
8use ahash::AHashMap;
9use log::debug;
10
11use super::{TurboConfig, SecurityError};
12use crate::analyzer::security::{SecuritySeverity, SecurityCategory};
13
14#[derive(Debug, Clone)]
16pub struct CompiledPattern {
17 pub id: String,
18 pub name: String,
19 pub severity: SecuritySeverity,
20 pub category: SecurityCategory,
21 pub description: String,
22 pub remediation: Vec<String>,
23 pub references: Vec<String>,
24 pub cwe_id: Option<String>,
25 pub confidence_boost_keywords: Vec<String>,
26 pub false_positive_keywords: Vec<String>,
27}
28
29#[derive(Debug, Clone)]
31pub struct PatternMatch {
32 pub pattern: Arc<CompiledPattern>,
33 pub line_number: usize,
34 pub column_number: usize,
35 pub evidence: String,
36 pub confidence: f32,
37}
38
39pub struct PatternEngine {
41 secret_matcher: AhoCorasick,
43 env_var_matcher: AhoCorasick,
44 api_key_matcher: AhoCorasick,
45
46 secret_patterns: AHashMap<usize, Arc<CompiledPattern>>,
48 env_var_patterns: AHashMap<usize, Arc<CompiledPattern>>,
49 api_key_patterns: AHashMap<usize, Arc<CompiledPattern>>,
50
51 complex_patterns: Vec<(Regex, Arc<CompiledPattern>)>,
53
54 total_patterns: usize,
56}
57
58impl PatternEngine {
59 pub fn new(config: &TurboConfig) -> Result<Self, SecurityError> {
60 debug!("Initializing pattern engine with pattern sets: {:?}", config.pattern_sets);
61
62 let (secret_patterns, env_var_patterns, api_key_patterns, complex_patterns) =
64 Self::load_patterns(&config.pattern_sets)?;
65
66 let secret_matcher = Self::build_matcher(&secret_patterns)?;
68 let env_var_matcher = Self::build_matcher(&env_var_patterns)?;
69 let api_key_matcher = Self::build_matcher(&api_key_patterns)?;
70
71 let total_patterns = secret_patterns.len() + env_var_patterns.len() +
72 api_key_patterns.len() + complex_patterns.len();
73
74 debug!("Pattern engine initialized with {} total patterns", total_patterns);
75
76 Ok(Self {
77 secret_matcher,
78 env_var_matcher,
79 api_key_matcher,
80 secret_patterns: Self::create_pattern_map(secret_patterns),
81 env_var_patterns: Self::create_pattern_map(env_var_patterns),
82 api_key_patterns: Self::create_pattern_map(api_key_patterns),
83 complex_patterns,
84 total_patterns,
85 })
86 }
87
88 pub fn pattern_count(&self) -> usize {
90 self.total_patterns
91 }
92
93 pub fn scan_content(&self, content: &str, quick_reject: bool) -> Vec<PatternMatch> {
95 if quick_reject && !self.quick_contains_secrets(content) {
97 return Vec::new();
98 }
99
100 let mut matches = Vec::new();
101
102 let lines: Vec<&str> = content.lines().collect();
104 let mut line_offsets = vec![0];
105 let mut offset = 0;
106
107 for line in &lines {
108 offset += line.len() + 1; line_offsets.push(offset);
110 }
111
112 matches.extend(self.run_matcher(&self.secret_matcher, content, &self.secret_patterns, &lines, &line_offsets));
114 matches.extend(self.run_matcher(&self.env_var_matcher, content, &self.env_var_patterns, &lines, &line_offsets));
115 matches.extend(self.run_matcher(&self.api_key_matcher, content, &self.api_key_patterns, &lines, &line_offsets));
116
117 for (line_num, line) in lines.iter().enumerate() {
119 for (regex, pattern) in &self.complex_patterns {
120 if let Some(mat) = regex.find(line) {
121 let confidence = self.calculate_confidence(line, content, &pattern);
122
123 matches.push(PatternMatch {
124 pattern: Arc::clone(pattern),
125 line_number: line_num + 1,
126 column_number: mat.start() + 1,
127 evidence: self.extract_evidence(line, mat.start(), mat.end()),
128 confidence,
129 });
130 }
131 }
132 }
133
134 matches.retain(|m| {
136 let threshold = match m.pattern.id.as_str() {
137 id if id.contains("aws-access-key") || id.contains("openai-api-key") => 0.3, id if id.contains("jwt-token") || id.contains("database-url") => 0.5, id if id.contains("generic") => 0.7, _ => 0.6, };
142 m.confidence > threshold
143 });
144
145 matches
146 }
147
148 fn quick_contains_secrets(&self, content: &str) -> bool {
150 const QUICK_PATTERNS: &[&str] = &[
152 "api", "key", "secret", "token", "password", "credential",
153 "auth", "private", "-----BEGIN", "sk_", "pk_", "eyJ",
154 ];
155
156 let content_lower = content.to_lowercase();
157 QUICK_PATTERNS.iter().any(|&pattern| content_lower.contains(pattern))
158 }
159
160 fn run_matcher(
162 &self,
163 matcher: &AhoCorasick,
164 content: &str,
165 patterns: &AHashMap<usize, Arc<CompiledPattern>>,
166 lines: &[&str],
167 line_offsets: &[usize],
168 ) -> Vec<PatternMatch> {
169 let mut matches = Vec::new();
170
171 for mat in matcher.find_iter(content) {
172 let pattern_id = mat.pattern().as_usize();
173 if let Some(pattern) = patterns.get(&pattern_id) {
174 let (line_num, col_num) = self.offset_to_line_col(mat.start(), line_offsets);
176 let line = lines.get(line_num.saturating_sub(1)).unwrap_or(&"");
177
178 let confidence = self.calculate_confidence(line, content, pattern);
179
180 matches.push(PatternMatch {
181 pattern: Arc::clone(pattern),
182 line_number: line_num,
183 column_number: col_num,
184 evidence: self.extract_evidence(line, mat.start(), mat.end()),
185 confidence,
186 });
187 }
188 }
189
190 matches
191 }
192
193 fn offset_to_line_col(&self, offset: usize, line_offsets: &[usize]) -> (usize, usize) {
195 let line_num = line_offsets.binary_search(&offset)
196 .unwrap_or_else(|i| i.saturating_sub(1));
197
198 let line_start = line_offsets.get(line_num).copied().unwrap_or(0);
199 let col_num = offset - line_start + 1;
200
201 (line_num + 1, col_num)
202 }
203
204 fn calculate_confidence(&self, line: &str, content: &str, pattern: &CompiledPattern) -> f32 {
206 let mut confidence: f32 = 0.6;
207
208 let line_lower = line.to_lowercase();
209 let content_lower = content.to_lowercase();
210
211 if line_lower.starts_with("//") || line_lower.starts_with("#") || line_lower.contains("example") ||
213 line_lower.contains("placeholder") || line_lower.contains("your_") || line_lower.contains("todo") {
214 return 0.0; }
216
217 if line.contains("=") || line.contains(":") {
219 confidence += 0.2;
220 }
221
222 for keyword in &pattern.confidence_boost_keywords {
224 if content_lower.contains(&keyword.to_lowercase()) {
225 confidence += 0.1;
226 }
227 }
228
229 for keyword in &pattern.false_positive_keywords {
230 if line_lower.contains(&keyword.to_lowercase()) {
231 confidence -= 0.4;
232 }
233 }
234
235 confidence.clamp(0.0, 1.0)
236 }
237
238
239
240 fn extract_evidence(&self, line: &str, start: usize, end: usize) -> String {
242 let prefix = &line[..start.min(line.len())];
244 let suffix = &line[end.min(line.len())..];
245 let masked = "*".repeat((end - start).min(20));
246
247 format!("{}{}{}", prefix, masked, suffix).trim().to_string()
248 }
249
250 fn build_matcher(patterns: &[(String, Arc<CompiledPattern>)]) -> Result<AhoCorasick, SecurityError> {
252 let strings: Vec<&str> = patterns.iter().map(|(s, _)| s.as_str()).collect();
253
254 let matcher = AhoCorasickBuilder::new()
255 .match_kind(MatchKind::LeftmostFirst)
256 .ascii_case_insensitive(true)
257 .build(&strings)
258 .map_err(|e| SecurityError::PatternEngine(format!("Failed to build matcher: {}", e)))?;
259
260 Ok(matcher)
261 }
262
263 fn create_pattern_map(patterns: Vec<(String, Arc<CompiledPattern>)>) -> AHashMap<usize, Arc<CompiledPattern>> {
265 patterns.into_iter()
266 .enumerate()
267 .map(|(id, (_, pattern))| (id, pattern))
268 .collect()
269 }
270
271 fn load_patterns(pattern_sets: &[String]) -> Result<(
273 Vec<(String, Arc<CompiledPattern>)>,
274 Vec<(String, Arc<CompiledPattern>)>,
275 Vec<(String, Arc<CompiledPattern>)>,
276 Vec<(Regex, Arc<CompiledPattern>)>,
277 ), SecurityError> {
278 let mut secret_patterns = Vec::new();
279 let mut env_var_patterns = Vec::new();
280 let mut api_key_patterns = Vec::new();
281 let mut complex_patterns = Vec::new();
282
283 if pattern_sets.contains(&"default".to_string()) {
285 Self::load_default_patterns(&mut secret_patterns, &mut env_var_patterns,
286 &mut api_key_patterns, &mut complex_patterns)?;
287 }
288
289 for set in pattern_sets {
291 match set.as_str() {
292 "aws" => Self::load_aws_patterns(&mut api_key_patterns)?,
293 "gcp" => Self::load_gcp_patterns(&mut api_key_patterns)?,
294 "azure" => Self::load_azure_patterns(&mut api_key_patterns)?,
295 "crypto" => Self::load_crypto_patterns(&mut secret_patterns)?,
296 _ => {}
297 }
298 }
299
300 Ok((secret_patterns, env_var_patterns, api_key_patterns, complex_patterns))
301 }
302
303 fn load_default_patterns(
305 secret_patterns: &mut Vec<(String, Arc<CompiledPattern>)>,
306 env_var_patterns: &mut Vec<(String, Arc<CompiledPattern>)>,
307 api_key_patterns: &mut Vec<(String, Arc<CompiledPattern>)>,
308 complex_patterns: &mut Vec<(Regex, Arc<CompiledPattern>)>,
309 ) -> Result<(), SecurityError> {
310 api_key_patterns.push((
314 "sk-".to_string(),
315 Arc::new(CompiledPattern {
316 id: "openai-api-key".to_string(),
317 name: "OpenAI API Key".to_string(),
318 severity: SecuritySeverity::Critical,
319 category: SecurityCategory::SecretsExposure,
320 description: "OpenAI API key detected".to_string(),
321 remediation: vec![
322 "Remove API key from source code".to_string(),
323 "Use environment variables".to_string(),
324 ],
325 references: vec!["https://platform.openai.com/docs/api-reference".to_string()],
326 cwe_id: Some("CWE-798".to_string()),
327 confidence_boost_keywords: vec!["openai".to_string(), "gpt".to_string()],
328 false_positive_keywords: vec![
329 "sk-xxxxxxxx".to_string(), "sk-...".to_string(), "sk_test".to_string(),
330 "example".to_string(), "placeholder".to_string(), "your_".to_string(),
331 "TODO".to_string(), "FIXME".to_string(), "XXX".to_string(),
332 ],
333 }),
334 ));
335
336 complex_patterns.push((
338 Regex::new(r#"(?i)(?:api[_-]?key|secret[_-]?key|access[_-]?token)\s*[:=]\s*['"]([a-zA-Z0-9+/=]{32,})['"]"#)
340 .map_err(|e| SecurityError::PatternEngine(format!("Regex error: {}", e)))?,
341 Arc::new(CompiledPattern {
342 id: "long-secret-value".to_string(),
343 name: "Hardcoded Secret Value".to_string(),
344 severity: SecuritySeverity::Critical,
345 category: SecurityCategory::SecretsExposure,
346 description: "Long secret value hardcoded in source code".to_string(),
347 remediation: vec![
348 "Use environment variables for secrets".to_string(),
349 "Implement proper secret management".to_string(),
350 ],
351 references: vec![],
352 cwe_id: Some("CWE-798".to_string()),
353 confidence_boost_keywords: vec!["bearer".to_string(), "auth".to_string()],
354 false_positive_keywords: vec![
355 "process.env".to_string(), "getenv".to_string(), "example".to_string(),
356 "placeholder".to_string(), "your_".to_string(), "TODO".to_string(),
357 "test".to_string(), "demo".to_string(), "fake".to_string(),
358 ],
359 }),
360 ));
361
362 complex_patterns.push((
364 Regex::new(r#"\beyJ[a-zA-Z0-9+/=]{100,}\b"#)
365 .map_err(|e| SecurityError::PatternEngine(format!("Regex error: {}", e)))?,
366 Arc::new(CompiledPattern {
367 id: "jwt-token".to_string(),
368 name: "JWT Token".to_string(),
369 severity: SecuritySeverity::High,
370 category: SecurityCategory::SecretsExposure,
371 description: "JWT token detected in source code".to_string(),
372 remediation: vec![
373 "Never hardcode JWT tokens".to_string(),
374 "Use secure token storage".to_string(),
375 ],
376 references: vec![],
377 cwe_id: Some("CWE-798".to_string()),
378 confidence_boost_keywords: vec!["bearer".to_string(), "authorization".to_string()],
379 false_positive_keywords: vec!["example".to_string(), "demo".to_string()],
380 }),
381 ));
382
383 complex_patterns.push((
385 Regex::new(r#"(?i)(?:postgres|mysql|mongodb)://[^:\s]+:[^@\s]+@[^/\s]+/[^\s]*"#)
386 .map_err(|e| SecurityError::PatternEngine(format!("Regex error: {}", e)))?,
387 Arc::new(CompiledPattern {
388 id: "database-url-with-creds".to_string(),
389 name: "Database URL with Credentials".to_string(),
390 severity: SecuritySeverity::Critical,
391 category: SecurityCategory::SecretsExposure,
392 description: "Database connection string with embedded credentials".to_string(),
393 remediation: vec![
394 "Use environment variables for database credentials".to_string(),
395 "Use connection string without embedded passwords".to_string(),
396 ],
397 references: vec![],
398 cwe_id: Some("CWE-798".to_string()),
399 confidence_boost_keywords: vec!["connection".to_string(), "database".to_string()],
400 false_positive_keywords: vec![
401 "example.com".to_string(), "localhost".to_string(), "placeholder".to_string(),
402 "your_".to_string(), "user:pass".to_string(),
403 ],
404 }),
405 ));
406
407 secret_patterns.push((
409 "-----BEGIN".to_string(),
410 Arc::new(CompiledPattern {
411 id: "private-key-header".to_string(),
412 name: "Private Key".to_string(),
413 severity: SecuritySeverity::Critical,
414 category: SecurityCategory::SecretsExposure,
415 description: "Private key detected".to_string(),
416 remediation: vec![
417 "Never commit private keys to version control".to_string(),
418 "Use secure key storage solutions".to_string(),
419 ],
420 references: vec![],
421 cwe_id: Some("CWE-321".to_string()),
422 confidence_boost_keywords: vec!["PRIVATE".to_string(), "RSA".to_string(), "DSA".to_string()],
423 false_positive_keywords: vec!["PUBLIC".to_string(), "CERTIFICATE".to_string()],
424 }),
425 ));
426
427 Ok(())
428 }
429
430 fn load_aws_patterns(api_key_patterns: &mut Vec<(String, Arc<CompiledPattern>)>) -> Result<(), SecurityError> {
432 api_key_patterns.push((
433 "AKIA".to_string(),
434 Arc::new(CompiledPattern {
435 id: "aws-access-key".to_string(),
436 name: "AWS Access Key".to_string(),
437 severity: SecuritySeverity::Critical,
438 category: SecurityCategory::SecretsExposure,
439 description: "AWS Access Key ID detected".to_string(),
440 remediation: vec![
441 "Remove AWS credentials from source code".to_string(),
442 "Use IAM roles or environment variables".to_string(),
443 "Rotate the exposed key immediately".to_string(),
444 ],
445 references: vec!["https://docs.aws.amazon.com/security/".to_string()],
446 cwe_id: Some("CWE-798".to_string()),
447 confidence_boost_keywords: vec!["aws".to_string(), "s3".to_string(), "ec2".to_string()],
448 false_positive_keywords: vec!["AKIA00000000".to_string()],
449 }),
450 ));
451
452 Ok(())
453 }
454
455 fn load_gcp_patterns(api_key_patterns: &mut Vec<(String, Arc<CompiledPattern>)>) -> Result<(), SecurityError> {
457 api_key_patterns.push((
458 "AIza".to_string(),
459 Arc::new(CompiledPattern {
460 id: "gcp-api-key".to_string(),
461 name: "Google Cloud API Key".to_string(),
462 severity: SecuritySeverity::High,
463 category: SecurityCategory::SecretsExposure,
464 description: "Google Cloud API key detected".to_string(),
465 remediation: vec![
466 "Use service accounts instead of API keys".to_string(),
467 "Restrict API key usage by IP/referrer".to_string(),
468 ],
469 references: vec!["https://cloud.google.com/security/".to_string()],
470 cwe_id: Some("CWE-798".to_string()),
471 confidence_boost_keywords: vec!["google".to_string(), "gcp".to_string(), "firebase".to_string()],
472 false_positive_keywords: vec![],
473 }),
474 ));
475
476 Ok(())
477 }
478
479 fn load_azure_patterns(_api_key_patterns: &mut Vec<(String, Arc<CompiledPattern>)>) -> Result<(), SecurityError> {
481 Ok(())
483 }
484
485 fn load_crypto_patterns(secret_patterns: &mut Vec<(String, Arc<CompiledPattern>)>) -> Result<(), SecurityError> {
487 secret_patterns.push((
488 "-----BEGIN".to_string(),
489 Arc::new(CompiledPattern {
490 id: "private-key".to_string(),
491 name: "Private Key".to_string(),
492 severity: SecuritySeverity::Critical,
493 category: SecurityCategory::SecretsExposure,
494 description: "Private key detected".to_string(),
495 remediation: vec![
496 "Never commit private keys to version control".to_string(),
497 "Use secure key storage solutions".to_string(),
498 ],
499 references: vec![],
500 cwe_id: Some("CWE-321".to_string()),
501 confidence_boost_keywords: vec!["RSA".to_string(), "PRIVATE".to_string()],
502 false_positive_keywords: vec!["PUBLIC".to_string()],
503 }),
504 ));
505
506 Ok(())
507 }
508}
509
510#[cfg(test)]
511mod tests {
512 use super::*;
513
514 #[test]
515 fn test_pattern_engine_creation() {
516 let config = TurboConfig::default();
517 let engine = PatternEngine::new(&config);
518 assert!(engine.is_ok());
519
520 let engine = engine.unwrap();
521 assert!(engine.pattern_count() > 0);
522 }
523
524 #[test]
525 fn test_pattern_matching() {
526 let config = TurboConfig::default();
527 let engine = PatternEngine::new(&config).unwrap();
528
529 let content = r#"
530 const apiKey = "sk-1234567890abcdef";
531 password = "super_secret_password";
532 process.env.DATABASE_URL
533 "#;
534
535 let matches = engine.scan_content(content, false);
536 assert!(!matches.is_empty());
537
538 assert!(matches.iter().any(|m| m.pattern.id == "openai-api-key"));
540 assert!(matches.iter().any(|m| m.pattern.id == "generic-password"));
541 }
542
543 #[test]
544 fn test_quick_reject() {
545 let config = TurboConfig::default();
546 let engine = PatternEngine::new(&config).unwrap();
547
548 let safe_content = "fn main() { println!(\"Hello, world!\"); }";
549 let matches = engine.scan_content(safe_content, true);
550 assert!(matches.is_empty());
551 }
552}