1use std::collections::HashMap;
15use std::path::{Path, PathBuf};
16use std::fs;
17use regex::Regex;
18use log::{debug, info, warn};
19
20use super::{SecurityError, SecurityFinding, SecuritySeverity, SecurityCategory, SecurityReport, SecurityAnalysisConfig, GitIgnoreAnalyzer, GitIgnoreRisk};
21
22pub struct PythonSecurityAnalyzer {
24 config: SecurityAnalysisConfig,
25 python_patterns: Vec<PythonSecretPattern>,
26 framework_patterns: HashMap<String, Vec<FrameworkPattern>>,
27 ai_ml_patterns: Vec<AiMlPattern>,
28 cloud_patterns: Vec<CloudPattern>,
29 database_patterns: Vec<DatabasePattern>,
30 env_var_patterns: Vec<EnvVarPattern>,
31 gitignore_analyzer: Option<GitIgnoreAnalyzer>,
32}
33
34#[derive(Debug, Clone)]
36pub struct PythonSecretPattern {
37 pub id: String,
38 pub name: String,
39 pub pattern: Regex,
40 pub severity: SecuritySeverity,
41 pub description: String,
42 pub context_indicators: Vec<String>,
43 pub false_positive_indicators: Vec<String>,
44 pub remediation_hints: Vec<String>,
45}
46
47#[derive(Debug, Clone)]
49pub struct FrameworkPattern {
50 pub framework: String,
51 pub pattern: Regex,
52 pub severity: SecuritySeverity,
53 pub description: String,
54 pub file_extensions: Vec<String>,
55}
56
57#[derive(Debug, Clone)]
59pub struct AiMlPattern {
60 pub service: String,
61 pub pattern: Regex,
62 pub severity: SecuritySeverity,
63 pub description: String,
64 pub api_key_format: String,
65}
66
67#[derive(Debug, Clone)]
69pub struct CloudPattern {
70 pub provider: String,
71 pub service: String,
72 pub pattern: Regex,
73 pub severity: SecuritySeverity,
74 pub description: String,
75}
76
77#[derive(Debug, Clone)]
79pub struct DatabasePattern {
80 pub database_type: String,
81 pub pattern: Regex,
82 pub severity: SecuritySeverity,
83 pub description: String,
84}
85
86#[derive(Debug, Clone)]
88pub struct EnvVarPattern {
89 pub pattern: Regex,
90 pub severity: SecuritySeverity,
91 pub description: String,
92 pub sensitive_prefixes: Vec<String>,
93}
94
95impl PythonSecurityAnalyzer {
96 pub fn new() -> Result<Self, SecurityError> {
97 Self::with_config(SecurityAnalysisConfig::default())
98 }
99
100 pub fn with_config(config: SecurityAnalysisConfig) -> Result<Self, SecurityError> {
101 let python_patterns = Self::initialize_python_patterns()?;
102 let framework_patterns = Self::initialize_framework_patterns()?;
103 let ai_ml_patterns = Self::initialize_ai_ml_patterns()?;
104 let cloud_patterns = Self::initialize_cloud_patterns()?;
105 let database_patterns = Self::initialize_database_patterns()?;
106 let env_var_patterns = Self::initialize_env_var_patterns()?;
107
108 Ok(Self {
109 config,
110 python_patterns,
111 framework_patterns,
112 ai_ml_patterns,
113 cloud_patterns,
114 database_patterns,
115 env_var_patterns,
116 gitignore_analyzer: None,
117 })
118 }
119
120 pub fn analyze_project(&mut self, project_root: &Path) -> Result<SecurityReport, SecurityError> {
122 let mut findings = Vec::new();
123
124 let mut gitignore_analyzer = GitIgnoreAnalyzer::new(project_root)
126 .map_err(|e| SecurityError::AnalysisFailed(format!("Failed to initialize gitignore analyzer: {}", e)))?;
127
128 info!("🔍 Using gitignore-aware security analysis for Python project at {}", project_root.display());
129
130 let python_extensions = ["py", "pyx", "pyi", "pyw"];
132 let python_files = gitignore_analyzer.get_files_to_analyze(&python_extensions)
133 .map_err(|e| SecurityError::Io(e))?
134 .into_iter()
135 .filter(|file| {
136 if let Some(ext) = file.extension().and_then(|e| e.to_str()) {
137 python_extensions.contains(&ext)
138 } else {
139 false
140 }
141 })
142 .collect::<Vec<_>>();
143
144 info!("Found {} Python files to analyze (gitignore-filtered)", python_files.len());
145
146 for file_path in &python_files {
148 let gitignore_status = gitignore_analyzer.analyze_file(file_path);
149 let mut file_findings = self.analyze_python_file(file_path)?;
150
151 for finding in &mut file_findings {
153 self.enhance_finding_with_gitignore_status(finding, &gitignore_status);
154 }
155
156 findings.extend(file_findings);
157 }
158
159 findings.extend(self.analyze_config_files_with_gitignore(project_root, &mut gitignore_analyzer)?);
161
162 findings.extend(self.analyze_env_files_with_gitignore(project_root, &mut gitignore_analyzer)?);
164
165 findings.extend(self.analyze_dependency_files_with_gitignore(project_root, &mut gitignore_analyzer)?);
167
168 let secret_files: Vec<PathBuf> = findings.iter()
170 .filter_map(|f| f.file_path.as_ref())
171 .cloned()
172 .collect();
173
174 let gitignore_recommendations = gitignore_analyzer.generate_gitignore_recommendations(&secret_files);
175
176 let mut report = SecurityReport::from_findings(findings);
178 report.recommendations.extend(gitignore_recommendations);
179
180 report.recommendations.extend(self.generate_python_security_recommendations());
182
183 Ok(report)
184 }
185
186 fn analyze_python_file(&self, file_path: &Path) -> Result<Vec<SecurityFinding>, SecurityError> {
188 let content = fs::read_to_string(file_path)?;
189 let mut findings = Vec::new();
190
191 for pattern in &self.python_patterns {
193 findings.extend(self.check_python_pattern_in_content(&content, pattern, file_path)?);
194 }
195
196 for pattern in &self.ai_ml_patterns {
198 findings.extend(self.check_ai_ml_pattern_in_content(&content, pattern, file_path)?);
199 }
200
201 for pattern in &self.cloud_patterns {
203 findings.extend(self.check_cloud_pattern_in_content(&content, pattern, file_path)?);
204 }
205
206 for pattern in &self.database_patterns {
208 findings.extend(self.check_database_pattern_in_content(&content, pattern, file_path)?);
209 }
210
211 let detected_framework = self.detect_python_framework(&content);
213 if let Some(framework) = detected_framework {
214 if let Some(framework_patterns) = self.framework_patterns.get(&framework) {
215 for pattern in framework_patterns {
216 findings.extend(self.check_framework_pattern_in_content(&content, pattern, file_path)?);
217 }
218 }
219 }
220
221 findings.extend(self.check_env_var_usage(&content, file_path)?);
223
224 findings.extend(self.check_insecure_python_practices(&content, file_path)?);
226
227 Ok(findings)
228 }
229
230 fn check_python_pattern_in_content(
232 &self,
233 content: &str,
234 pattern: &PythonSecretPattern,
235 file_path: &Path,
236 ) -> Result<Vec<SecurityFinding>, SecurityError> {
237 let mut findings = Vec::new();
238
239 for (line_num, line) in content.lines().enumerate() {
240 if let Some(captures) = pattern.pattern.captures(line) {
241 if pattern.false_positive_indicators.iter().any(|indicator| {
243 line.to_lowercase().contains(&indicator.to_lowercase())
244 }) {
245 debug!("Skipping potential false positive in {}: {}", file_path.display(), line.trim());
246 continue;
247 }
248
249 let (evidence, column_number) = if captures.len() > 1 {
251 if let Some(match_) = captures.get(1) {
252 (Some(self.mask_secret(match_.as_str())), Some(match_.start() + 1))
253 } else {
254 (Some(line.trim().to_string()), None)
255 }
256 } else {
257 if let Some(match_) = captures.get(0) {
258 (Some(line.trim().to_string()), Some(match_.start() + 1))
259 } else {
260 (Some(line.trim().to_string()), None)
261 }
262 };
263
264 let context_score = self.calculate_context_confidence(content, &pattern.context_indicators);
266 let adjusted_severity = self.adjust_severity_by_context(pattern.severity.clone(), context_score);
267
268 findings.push(SecurityFinding {
269 id: format!("{}-{}", pattern.id, line_num),
270 title: format!("{} Detected", pattern.name),
271 description: format!("{} (Context confidence: {:.1})", pattern.description, context_score),
272 severity: adjusted_severity,
273 category: SecurityCategory::SecretsExposure,
274 file_path: Some(file_path.to_path_buf()),
275 line_number: Some(line_num + 1),
276 column_number,
277 evidence,
278 remediation: pattern.remediation_hints.clone(),
279 references: vec![
280 "https://owasp.org/www-project-top-ten/2021/A05_2021-Security_Misconfiguration/".to_string(),
281 "https://cheatsheetseries.owasp.org/cheatsheets/Secrets_Management_Cheat_Sheet.html".to_string(),
282 "https://docs.python.org/3/library/os.html#os.environ".to_string(),
283 ],
284 cwe_id: Some("CWE-200".to_string()),
285 compliance_frameworks: vec!["SOC2".to_string(), "GDPR".to_string()],
286 });
287 }
288 }
289
290 Ok(findings)
291 }
292
293 fn check_ai_ml_pattern_in_content(
295 &self,
296 content: &str,
297 pattern: &AiMlPattern,
298 file_path: &Path,
299 ) -> Result<Vec<SecurityFinding>, SecurityError> {
300 let mut findings = Vec::new();
301
302 for (line_num, line) in content.lines().enumerate() {
303 if let Some(captures) = pattern.pattern.captures(line) {
304 let evidence = if captures.len() > 1 {
305 captures.get(1).map(|m| self.mask_secret(m.as_str()))
306 } else {
307 Some(line.trim().to_string())
308 };
309
310 let column_number = captures.get(0).map(|m| m.start() + 1);
311
312 findings.push(SecurityFinding {
313 id: format!("ai-ml-{}-{}", pattern.service.to_lowercase().replace(" ", "-"), line_num),
314 title: format!("{} API Key Detected", pattern.service),
315 description: format!("{} (Expected format: {})", pattern.description, pattern.api_key_format),
316 severity: pattern.severity.clone(),
317 category: SecurityCategory::SecretsExposure,
318 file_path: Some(file_path.to_path_buf()),
319 line_number: Some(line_num + 1),
320 column_number,
321 evidence,
322 remediation: vec![
323 format!("Store {} API key in environment variables", pattern.service),
324 "Use a secrets management service for production".to_string(),
325 "Implement API key rotation policies".to_string(),
326 "Monitor API key usage for anomalies".to_string(),
327 ],
328 references: vec![
329 "https://owasp.org/www-project-api-security/".to_string(),
330 format!("https://platform.openai.com/docs/quickstart/account-setup"),
331 ],
332 cwe_id: Some("CWE-798".to_string()),
333 compliance_frameworks: vec!["SOC2".to_string(), "GDPR".to_string()],
334 });
335 }
336 }
337
338 Ok(findings)
339 }
340
341 fn check_cloud_pattern_in_content(
343 &self,
344 content: &str,
345 pattern: &CloudPattern,
346 file_path: &Path,
347 ) -> Result<Vec<SecurityFinding>, SecurityError> {
348 let mut findings = Vec::new();
349
350 for (line_num, line) in content.lines().enumerate() {
351 if let Some(captures) = pattern.pattern.captures(line) {
352 let evidence = if captures.len() > 1 {
353 captures.get(1).map(|m| self.mask_secret(m.as_str()))
354 } else {
355 Some(line.trim().to_string())
356 };
357
358 let column_number = captures.get(0).map(|m| m.start() + 1);
359
360 findings.push(SecurityFinding {
361 id: format!("cloud-{}-{}-{}",
362 pattern.provider.to_lowercase(),
363 pattern.service.to_lowercase().replace(" ", "-"),
364 line_num),
365 title: format!("{} {} Detected", pattern.provider, pattern.service),
366 description: pattern.description.clone(),
367 severity: pattern.severity.clone(),
368 category: SecurityCategory::SecretsExposure,
369 file_path: Some(file_path.to_path_buf()),
370 line_number: Some(line_num + 1),
371 column_number,
372 evidence,
373 remediation: vec![
374 format!("Use {} managed identity or role-based access", pattern.provider),
375 "Store credentials in secure key management service".to_string(),
376 "Implement credential rotation policies".to_string(),
377 "Use least-privilege access principles".to_string(),
378 ],
379 references: vec![
380 "https://owasp.org/www-project-top-ten/2021/A07_2021-Identification_and_Authentication_Failures/".to_string(),
381 format!("https://docs.aws.amazon.com/security/"),
382 ],
383 cwe_id: Some("CWE-522".to_string()),
384 compliance_frameworks: vec!["SOC2".to_string(), "PCI-DSS".to_string()],
385 });
386 }
387 }
388
389 Ok(findings)
390 }
391
392 fn check_database_pattern_in_content(
394 &self,
395 content: &str,
396 pattern: &DatabasePattern,
397 file_path: &Path,
398 ) -> Result<Vec<SecurityFinding>, SecurityError> {
399 let mut findings = Vec::new();
400
401 for (line_num, line) in content.lines().enumerate() {
402 if pattern.pattern.is_match(line) {
403 let masked_line = self.mask_database_connection(line);
405
406 findings.push(SecurityFinding {
407 id: format!("database-{}-{}", pattern.database_type.to_lowercase(), line_num),
408 title: format!("{} Connection String with Credentials", pattern.database_type),
409 description: pattern.description.clone(),
410 severity: pattern.severity.clone(),
411 category: SecurityCategory::SecretsExposure,
412 file_path: Some(file_path.to_path_buf()),
413 line_number: Some(line_num + 1),
414 column_number: None,
415 evidence: Some(masked_line),
416 remediation: vec![
417 "Use environment variables for database credentials".to_string(),
418 "Implement connection pooling with credential management".to_string(),
419 "Use database authentication mechanisms like IAM roles".to_string(),
420 "Consider using encrypted connection strings".to_string(),
421 ],
422 references: vec![
423 "https://owasp.org/www-project-top-ten/2021/A07_2021-Identification_and_Authentication_Failures/".to_string(),
424 "https://cheatsheetseries.owasp.org/cheatsheets/Database_Security_Cheat_Sheet.html".to_string(),
425 ],
426 cwe_id: Some("CWE-798".to_string()),
427 compliance_frameworks: vec!["SOC2".to_string(), "GDPR".to_string(), "PCI-DSS".to_string()],
428 });
429 }
430 }
431
432 Ok(findings)
433 }
434
435 fn check_framework_pattern_in_content(
437 &self,
438 content: &str,
439 pattern: &FrameworkPattern,
440 file_path: &Path,
441 ) -> Result<Vec<SecurityFinding>, SecurityError> {
442 let mut findings = Vec::new();
443
444 for (line_num, line) in content.lines().enumerate() {
445 if let Some(captures) = pattern.pattern.captures(line) {
446 let evidence = if captures.len() > 1 {
447 captures.get(1).map(|m| self.mask_secret(m.as_str()))
448 } else {
449 Some(line.trim().to_string())
450 };
451
452 findings.push(SecurityFinding {
453 id: format!("framework-{}-{}", pattern.framework.to_lowercase(), line_num),
454 title: format!("{} Security Issue", pattern.framework),
455 description: pattern.description.clone(),
456 severity: pattern.severity.clone(),
457 category: SecurityCategory::SecretsExposure,
458 file_path: Some(file_path.to_path_buf()),
459 line_number: Some(line_num + 1),
460 column_number: None,
461 evidence,
462 remediation: self.generate_framework_remediation(&pattern.framework),
463 references: vec![
464 format!("https://docs.djangoproject.com/en/stable/topics/security/"),
465 "https://owasp.org/www-project-top-ten/".to_string(),
466 ],
467 cwe_id: Some("CWE-200".to_string()),
468 compliance_frameworks: vec!["SOC2".to_string()],
469 });
470 }
471 }
472
473 Ok(findings)
474 }
475
476 fn initialize_python_patterns() -> Result<Vec<PythonSecretPattern>, SecurityError> {
478 let patterns = vec_+\-=\[\]{}|;:,.<>?/~`]{40,})["']"#)?,
484 severity: SecuritySeverity::Critical,
485 description: "Django SECRET_KEY found in source code".to_string(),
486 context_indicators: vec!["django".to_string(), "settings".to_string(), "SECRET_KEY".to_string()],
487 false_positive_indicators: vec!["example".to_string(), "your-secret-key".to_string(), "fake".to_string()],
488 remediation_hints: vec![
489 "Move SECRET_KEY to environment variables".to_string(),
490 "Use python-decouple or similar library".to_string(),
491 "Never commit SECRET_KEY to version control".to_string(),
492 ],
493 },
494
495 PythonSecretPattern {
497 id: "python-flask-secret-key".to_string(),
498 name: "Flask SECRET_KEY".to_string(),
499 pattern: Regex::new(r#"(?i)app\.secret_key\s*=\s*["']([A-Za-z0-9!@#$%^&*()_+\-=\[\]{}|;:,.<>?/~`]{20,})["']"#)?,
500 severity: SecuritySeverity::High,
501 description: "Flask SECRET_KEY hardcoded in application".to_string(),
502 context_indicators: vec!["flask".to_string(), "app".to_string(), "secret_key".to_string()],
503 false_positive_indicators: vec!["example".to_string(), "your-secret".to_string()],
504 remediation_hints: vec![
505 "Use os.environ.get('SECRET_KEY')".to_string(),
506 "Store in environment variables".to_string(),
507 ],
508 },
509
510 PythonSecretPattern {
512 id: "python-fastapi-jwt-secret".to_string(),
513 name: "FastAPI JWT Secret".to_string(),
514 pattern: Regex::new(r#"(?i)(?:jwt_secret|jwt_key|secret_key)\s*=\s*["']([A-Za-z0-9!@#$%^&*()_+\-=\[\]{}|;:,.<>?/~`]{20,})["']"#)?,
515 severity: SecuritySeverity::High,
516 description: "FastAPI JWT secret hardcoded in source".to_string(),
517 context_indicators: vec!["fastapi".to_string(), "jwt".to_string(), "token".to_string()],
518 false_positive_indicators: vec!["example".to_string(), "test".to_string()],
519 remediation_hints: vec![
520 "Use Pydantic Settings for configuration".to_string(),
521 "Store JWT secrets in environment variables".to_string(),
522 ],
523 },
524
525 PythonSecretPattern {
527 id: "python-database-url".to_string(),
528 name: "Database Connection String".to_string(),
529 pattern: Regex::new(r#"(?i)(?:database_url|db_url|sqlalchemy_database_uri)\s*=\s*["'](?:postgresql|mysql|sqlite|mongodb)://[^"']*:[^"']*@[^"']+["']"#)?,
530 severity: SecuritySeverity::Critical,
531 description: "Database connection string with credentials detected".to_string(),
532 context_indicators: vec!["database".to_string(), "sqlalchemy".to_string(), "connect".to_string()],
533 false_positive_indicators: vec!["localhost".to_string(), "example.com".to_string(), "user:pass".to_string()],
534 remediation_hints: vec![
535 "Use environment variables for database credentials".to_string(),
536 "Consider using connection pooling and secrets management".to_string(),
537 ],
538 },
539
540 PythonSecretPattern {
542 id: "python-api-key-assignment".to_string(),
543 name: "API Key Assignment".to_string(),
544 pattern: Regex::new(r#"(?i)(?:api_key|apikey|access_key|secret_key|private_key|auth_token|bearer_token)\s*=\s*["']([A-Za-z0-9_-]{20,})["']"#)?,
545 severity: SecuritySeverity::High,
546 description: "API key hardcoded in variable assignment".to_string(),
547 context_indicators: vec!["requests".to_string(), "api".to_string(), "client".to_string()],
548 false_positive_indicators: vec!["os.environ".to_string(), "config".to_string(), "settings".to_string()],
549 remediation_hints: vec![
550 "Use environment variables or config files".to_string(),
551 "Consider using secrets management services".to_string(),
552 ],
553 },
554 ];
555
556 Ok(patterns)
557 }
558
559 fn initialize_ai_ml_patterns() -> Result<Vec<AiMlPattern>, SecurityError> {
561 let patterns = vec![
562 AiMlPattern {
564 service: "OpenAI".to_string(),
565 pattern: Regex::new(r#"(?i)(?:openai[_-]?api[_-]?key|openai[_-]?key)\s*[=:]\s*["']?(sk-[A-Za-z0-9]{32,})["']?"#)?,
566 severity: SecuritySeverity::Critical,
567 description: "OpenAI API key detected".to_string(),
568 api_key_format: "sk-[32+ alphanumeric characters]".to_string(),
569 },
570
571 AiMlPattern {
573 service: "OpenAI Organization".to_string(),
574 pattern: Regex::new(r#"(?i)(?:openai[_-]?org[_-]?id|openai[_-]?organization)\s*[=:]\s*["']?(org-[A-Za-z0-9]{20,})["']?"#)?,
575 severity: SecuritySeverity::Medium,
576 description: "OpenAI organization ID detected".to_string(),
577 api_key_format: "org-[20+ alphanumeric characters]".to_string(),
578 },
579
580 AiMlPattern {
582 service: "Anthropic Claude".to_string(),
583 pattern: Regex::new(r#"(?i)(?:anthropic[_-]?api[_-]?key|claude[_-]?api[_-]?key)\s*[=:]\s*["']?(sk-ant-[A-Za-z0-9]{40,})["']?"#)?,
584 severity: SecuritySeverity::Critical,
585 description: "Anthropic Claude API key detected".to_string(),
586 api_key_format: "sk-ant-[40+ alphanumeric characters]".to_string(),
587 },
588
589 AiMlPattern {
591 service: "Hugging Face".to_string(),
592 pattern: Regex::new(r#"(?i)(?:huggingface[_-]?api[_-]?key|huggingface[_-]?token|hf[_-]?token)\s*[=:]\s*["']?(hf_[A-Za-z0-9]{30,})["']?"#)?,
593 severity: SecuritySeverity::High,
594 description: "Hugging Face API token detected".to_string(),
595 api_key_format: "hf_[30+ alphanumeric characters]".to_string(),
596 },
597
598 AiMlPattern {
600 service: "Google AI/Gemini".to_string(),
601 pattern: Regex::new(r#"(?i)(?:google[_-]?ai[_-]?api[_-]?key|gemini[_-]?api[_-]?key)\s*[=:]\s*["']?(AIza[A-Za-z0-9_-]{35,})["']?"#)?,
602 severity: SecuritySeverity::Critical,
603 description: "Google AI/Gemini API key detected".to_string(),
604 api_key_format: "AIza[35+ alphanumeric characters with underscores/dashes]".to_string(),
605 },
606
607 AiMlPattern {
609 service: "Cohere".to_string(),
610 pattern: Regex::new(r#"(?i)(?:cohere[_-]?api[_-]?key)\s*[=:]\s*["']?([A-Za-z0-9]{40,})["']?"#)?,
611 severity: SecuritySeverity::High,
612 description: "Cohere API key detected".to_string(),
613 api_key_format: "[40+ alphanumeric characters]".to_string(),
614 },
615
616 AiMlPattern {
618 service: "Replicate".to_string(),
619 pattern: Regex::new(r#"(?i)(?:replicate[_-]?api[_-]?token|replicate[_-]?token)\s*[=:]\s*["']?(r8_[A-Za-z0-9]{30,})["']?"#)?,
620 severity: SecuritySeverity::High,
621 description: "Replicate API token detected".to_string(),
622 api_key_format: "r8_[30+ alphanumeric characters]".to_string(),
623 },
624
625 AiMlPattern {
627 service: "Stability AI".to_string(),
628 pattern: Regex::new(r#"(?i)(?:stability[_-]?ai[_-]?api[_-]?key|stable[_-]?diffusion[_-]?api[_-]?key)\s*[=:]\s*["']?(sk-[A-Za-z0-9]{40,})["']?"#)?,
629 severity: SecuritySeverity::High,
630 description: "Stability AI API key detected".to_string(),
631 api_key_format: "sk-[40+ alphanumeric characters]".to_string(),
632 },
633
634 AiMlPattern {
636 service: "DeepSeek".to_string(),
637 pattern: Regex::new(r#"(?i)(?:deepseek[_-]?api[_-]?key)\s*[=:]\s*["']?(sk-[A-Za-z0-9]{32,})["']?"#)?,
638 severity: SecuritySeverity::High,
639 description: "DeepSeek API key detected".to_string(),
640 api_key_format: "sk-[32+ alphanumeric characters]".to_string(),
641 },
642
643 AiMlPattern {
645 service: "Mistral AI".to_string(),
646 pattern: Regex::new(r#"(?i)(?:mistral[_-]?api[_-]?key)\s*[=:]\s*["']?([A-Za-z0-9]{32,})["']?"#)?,
647 severity: SecuritySeverity::High,
648 description: "Mistral AI API key detected".to_string(),
649 api_key_format: "[32+ alphanumeric characters]".to_string(),
650 },
651
652 AiMlPattern {
654 service: "Together AI".to_string(),
655 pattern: Regex::new(r#"(?i)(?:together[_-]?ai[_-]?api[_-]?key|together[_-]?api[_-]?key)\s*[=:]\s*["']?([A-Za-z0-9]{40,})["']?"#)?,
656 severity: SecuritySeverity::High,
657 description: "Together AI API key detected".to_string(),
658 api_key_format: "[40+ alphanumeric characters]".to_string(),
659 },
660
661 AiMlPattern {
663 service: "Weights & Biases".to_string(),
664 pattern: Regex::new(r#"(?i)(?:wandb[_-]?api[_-]?key|wandb[_-]?key)\s*[=:]\s*["']?([A-Za-z0-9]{40,})["']?"#)?,
665 severity: SecuritySeverity::Medium,
666 description: "Weights & Biases API key detected".to_string(),
667 api_key_format: "[40+ alphanumeric characters]".to_string(),
668 },
669
670 AiMlPattern {
672 service: "MLflow".to_string(),
673 pattern: Regex::new(r#"(?i)(?:mlflow[_-]?tracking[_-]?username|mlflow[_-]?tracking[_-]?password)\s*[=:]\s*["']?([A-Za-z0-9]{8,})["']?"#)?,
674 severity: SecuritySeverity::Medium,
675 description: "MLflow tracking credentials detected".to_string(),
676 api_key_format: "[8+ alphanumeric characters]".to_string(),
677 },
678 ];
679
680 Ok(patterns)
681 }
682
683 fn initialize_cloud_patterns() -> Result<Vec<CloudPattern>, SecurityError> {
685 let patterns = vec![
686 CloudPattern {
688 provider: "AWS".to_string(),
689 service: "IAM Access Key".to_string(),
690 pattern: Regex::new(r#"(?i)(?:aws[_-]?access[_-]?key[_-]?id)\s*[=:]\s*["']?(AKIA[A-Z0-9]{16})["']?"#)?,
691 severity: SecuritySeverity::Critical,
692 description: "AWS Access Key ID detected".to_string(),
693 },
694
695 CloudPattern {
697 provider: "AWS".to_string(),
698 service: "IAM Secret Key".to_string(),
699 pattern: Regex::new(r#"(?i)(?:aws[_-]?secret[_-]?access[_-]?key)\s*[=:]\s*["']?([A-Za-z0-9/+=]{40})["']?"#)?,
700 severity: SecuritySeverity::Critical,
701 description: "AWS Secret Access Key detected".to_string(),
702 },
703
704 CloudPattern {
706 provider: "AWS".to_string(),
707 service: "Session Token".to_string(),
708 pattern: Regex::new(r#"(?i)(?:aws[_-]?session[_-]?token)\s*[=:]\s*["']?([A-Za-z0-9/+=]{100,})["']?"#)?,
709 severity: SecuritySeverity::High,
710 description: "AWS Session Token detected".to_string(),
711 },
712
713 CloudPattern {
715 provider: "GCP".to_string(),
716 service: "Service Account Key".to_string(),
717 pattern: Regex::new(r#"(?i)(?:google[_-]?application[_-]?credentials|gcp[_-]?service[_-]?account)\s*[=:]\s*["']?([A-Za-z0-9/+=]{50,})["']?"#)?,
718 severity: SecuritySeverity::Critical,
719 description: "Google Cloud Service Account key detected".to_string(),
720 },
721
722 CloudPattern {
724 provider: "Azure".to_string(),
725 service: "Storage Account Key".to_string(),
726 pattern: Regex::new(r#"(?i)(?:azure[_-]?storage[_-]?account[_-]?key|azure[_-]?storage[_-]?key)\s*[=:]\s*["']?([A-Za-z0-9/+=]{88})["']?"#)?,
727 severity: SecuritySeverity::Critical,
728 description: "Azure Storage Account key detected".to_string(),
729 },
730
731 CloudPattern {
733 provider: "Azure".to_string(),
734 service: "Service Principal".to_string(),
735 pattern: Regex::new(r#"(?i)(?:azure[_-]?client[_-]?secret|azure[_-]?tenant[_-]?id)\s*[=:]\s*["']?([A-Za-z0-9-]{32,})["']?"#)?,
736 severity: SecuritySeverity::Critical,
737 description: "Azure Service Principal credentials detected".to_string(),
738 },
739
740 CloudPattern {
742 provider: "DigitalOcean".to_string(),
743 service: "API Token".to_string(),
744 pattern: Regex::new(r#"(?i)(?:digitalocean[_-]?api[_-]?token|do[_-]?api[_-]?token)\s*[=:]\s*["']?(dop_v1_[A-Za-z0-9]{64})["']?"#)?,
745 severity: SecuritySeverity::High,
746 description: "DigitalOcean API token detected".to_string(),
747 },
748
749 CloudPattern {
751 provider: "Heroku".to_string(),
752 service: "API Key".to_string(),
753 pattern: Regex::new(r#"(?i)(?:heroku[_-]?api[_-]?key)\s*[=:]\s*["']?([A-Za-z0-9-]{36})["']?"#)?,
754 severity: SecuritySeverity::High,
755 description: "Heroku API key detected".to_string(),
756 },
757
758 CloudPattern {
760 provider: "Stripe".to_string(),
761 service: "API Key".to_string(),
762 pattern: Regex::new(r#"(?i)(?:stripe[_-]?api[_-]?key|stripe[_-]?secret[_-]?key)\s*[=:]\s*["']?(sk_live_[A-Za-z0-9]{24}|sk_test_[A-Za-z0-9]{24})["']?"#)?,
763 severity: SecuritySeverity::Critical,
764 description: "Stripe API key detected".to_string(),
765 },
766
767 CloudPattern {
769 provider: "Twilio".to_string(),
770 service: "Auth Token".to_string(),
771 pattern: Regex::new(r#"(?i)(?:twilio[_-]?auth[_-]?token|twilio[_-]?account[_-]?sid)\s*[=:]\s*["']?([A-Za-z0-9]{32,34})["']?"#)?,
772 severity: SecuritySeverity::High,
773 description: "Twilio credentials detected".to_string(),
774 },
775 ];
776
777 Ok(patterns)
778 }
779
780 fn initialize_framework_patterns() -> Result<HashMap<String, Vec<FrameworkPattern>>, SecurityError> {
782 let mut frameworks = HashMap::new();
783
784 frameworks.insert("django".to_string(), vec![
786 FrameworkPattern {
787 framework: "Django".to_string(),
788 pattern: Regex::new(r#"(?i)(?:database|databases)\s*=\s*\{[^}]*['"']password['"']\s*:\s*['"']([^'"']+)['"'][^}]*\}"#)?,
789 severity: SecuritySeverity::Critical,
790 description: "Django database password in settings".to_string(),
791 file_extensions: vec!["py".to_string()],
792 },
793 FrameworkPattern {
794 framework: "Django".to_string(),
795 pattern: Regex::new(r#"(?i)email[_-]?host[_-]?password\s*=\s*["']([^"']+)["']"#)?,
796 severity: SecuritySeverity::High,
797 description: "Django email password in settings".to_string(),
798 file_extensions: vec!["py".to_string()],
799 },
800 ]);
801
802 frameworks.insert("flask".to_string(), vec[A-Z_]*)['"']\]\s*=\s*["']([^"']+)["']"#)?,
807 severity: SecuritySeverity::High,
808 description: "Flask configuration with potential secret".to_string(),
809 file_extensions: vec!["py".to_string()],
810 },
811 ]);
812
813 frameworks.insert("fastapi".to_string(), vec![
815 FrameworkPattern {
816 framework: "FastAPI".to_string(),
817 pattern: Regex::new(r#"(?i)class\s+Settings\([^)]*\):[^}]*([A-Z_]*(?:SECRET|KEY|PASSWORD|TOKEN)[A-Z_]*)\s*:\s*str\s*=\s*["']([^"']+)["']"#)?,
818 severity: SecuritySeverity::High,
819 description: "FastAPI Settings class with hardcoded secret".to_string(),
820 file_extensions: vec!["py".to_string()],
821 },
822 ]);
823
824 Ok(frameworks)
825 }
826
827 fn initialize_database_patterns() -> Result<Vec<DatabasePattern>, SecurityError> {
829 let patterns = vec![
830 DatabasePattern {
832 database_type: "PostgreSQL".to_string(),
833 pattern: Regex::new(r#"(?i)postgresql://[^:]+:[^@]+@[^/]+/[^"'\s]+"#)?,
834 severity: SecuritySeverity::Critical,
835 description: "PostgreSQL connection string with credentials".to_string(),
836 },
837
838 DatabasePattern {
840 database_type: "MySQL".to_string(),
841 pattern: Regex::new(r#"(?i)mysql://[^:]+:[^@]+@[^/]+/[^"'\s]+"#)?,
842 severity: SecuritySeverity::Critical,
843 description: "MySQL connection string with credentials".to_string(),
844 },
845
846 DatabasePattern {
848 database_type: "MongoDB".to_string(),
849 pattern: Regex::new(r#"(?i)mongodb://[^:]+:[^@]+@[^/]+/[^"'\s]+"#)?,
850 severity: SecuritySeverity::Critical,
851 description: "MongoDB connection string with credentials".to_string(),
852 },
853
854 DatabasePattern {
856 database_type: "Redis".to_string(),
857 pattern: Regex::new(r#"(?i)redis://[^:]*:[^@]+@[^/]+/[^"'\s]*"#)?,
858 severity: SecuritySeverity::High,
859 description: "Redis connection string with password".to_string(),
860 },
861
862 DatabasePattern {
864 database_type: "SQLAlchemy".to_string(),
865 pattern: Regex::new(r#"(?i)sqlalchemy_database_uri\s*=\s*["'][^"']*://[^:]+:[^@]+@[^"']+"#)?,
866 severity: SecuritySeverity::Critical,
867 description: "SQLAlchemy database URI with credentials".to_string(),
868 },
869 ];
870
871 Ok(patterns)
872 }
873
874 fn initialize_env_var_patterns() -> Result<Vec<EnvVarPattern>, SecurityError> {
876 let patterns = vec['"']\)"#)?,
879 severity: SecuritySeverity::Info,
880 description: "Environment variable usage detected".to_string(),
881 sensitive_prefixes: vec![
882 "SECRET".to_string(),
883 "KEY".to_string(),
884 "PASSWORD".to_string(),
885 "TOKEN".to_string(),
886 "API".to_string(),
887 "AUTH".to_string(),
888 "PRIVATE".to_string(),
889 "CREDENTIAL".to_string(),
890 ],
891 },
892 EnvVarPattern {
893 pattern: Regex::new(r#"getenv\(['"']([A-Z_]+)['"']\)"#)?,
894 severity: SecuritySeverity::Info,
895 description: "Environment variable access via getenv".to_string(),
896 sensitive_prefixes: vec![
897 "SECRET".to_string(),
898 "KEY".to_string(),
899 "PASSWORD".to_string(),
900 "TOKEN".to_string(),
901 ],
902 },
903 ];
904
905 Ok(patterns)
906 }
907
908 fn check_env_var_usage(&self, content: &str, file_path: &Path) -> Result<Vec<SecurityFinding>, SecurityError> {
910 let mut findings = Vec::new();
911
912 for pattern in &self.env_var_patterns {
913 for (line_num, line) in content.lines().enumerate() {
914 if let Some(captures) = pattern.pattern.captures(line) {
915 if let Some(var_name) = captures.get(1) {
916 let var_name = var_name.as_str();
917
918 let is_sensitive = pattern.sensitive_prefixes.iter().any(|prefix| {
920 var_name.to_uppercase().contains(prefix)
921 });
922
923 if is_sensitive {
924 if !line.contains("=") || line.contains("os.environ") || line.contains("getenv") {
926 continue;
928 }
929
930 let column_number = captures.get(0).map(|m| m.start() + 1);
931
932 findings.push(SecurityFinding {
933 id: format!("env-var-misuse-{}", line_num),
934 title: "Potential Environment Variable Misuse".to_string(),
935 description: format!("Sensitive environment variable '{}' usage detected", var_name),
936 severity: SecuritySeverity::Medium,
937 category: SecurityCategory::SecretsExposure,
938 file_path: Some(file_path.to_path_buf()),
939 line_number: Some(line_num + 1),
940 column_number,
941 evidence: Some(line.trim().to_string()),
942 remediation: vec![
943 "Ensure sensitive environment variables are properly protected".to_string(),
944 "Use python-decouple or similar libraries for configuration".to_string(),
945 "Document required environment variables".to_string(),
946 ],
947 references: vec![
948 "https://12factor.net/config".to_string(),
949 "https://docs.python.org/3/library/os.html#os.environ".to_string(),
950 ],
951 cwe_id: Some("CWE-200".to_string()),
952 compliance_frameworks: vec!["SOC2".to_string()],
953 });
954 }
955 }
956 }
957 }
958 }
959
960 Ok(findings)
961 }
962
963 fn check_insecure_python_practices(&self, content: &str, file_path: &Path) -> Result<Vec<SecurityFinding>, SecurityError> {
965 let mut findings = Vec::new();
966
967 if let Ok(eval_pattern) = Regex::new(r#"eval\s*\("#) {
969 for (line_num, line) in content.lines().enumerate() {
970 if eval_pattern.is_match(line) {
971 findings.push(SecurityFinding {
972 id: format!("insecure-eval-{}", line_num),
973 title: "Dangerous eval() Usage".to_string(),
974 description: "Use of eval() function detected - potential code injection risk".to_string(),
975 severity: SecuritySeverity::High,
976 category: SecurityCategory::CodeInjection,
977 file_path: Some(file_path.to_path_buf()),
978 line_number: Some(line_num + 1),
979 column_number: None,
980 evidence: Some(line.trim().to_string()),
981 remediation: vec![
982 "Avoid using eval() with user input".to_string(),
983 "Use ast.literal_eval() for safe evaluation of literals".to_string(),
984 "Consider using json.loads() for JSON data".to_string(),
985 ],
986 references: vec![
987 "https://owasp.org/www-project-top-ten/2021/A03_2021-Injection/".to_string(),
988 ],
989 cwe_id: Some("CWE-95".to_string()),
990 compliance_frameworks: vec!["SOC2".to_string()],
991 });
992 }
993 }
994 }
995
996 if let Ok(subprocess_pattern) = Regex::new(r#"subprocess\.(call|run|Popen)\([^)]*shell\s*=\s*True"#) {
998 for (line_num, line) in content.lines().enumerate() {
999 if subprocess_pattern.is_match(line) {
1000 findings.push(SecurityFinding {
1001 id: format!("shell-injection-{}", line_num),
1002 title: "Potential Shell Injection".to_string(),
1003 description: "subprocess call with shell=True detected - potential command injection risk".to_string(),
1004 severity: SecuritySeverity::High,
1005 category: SecurityCategory::CommandInjection,
1006 file_path: Some(file_path.to_path_buf()),
1007 line_number: Some(line_num + 1),
1008 column_number: None,
1009 evidence: Some(line.trim().to_string()),
1010 remediation: vec![
1011 "Avoid using shell=True with user input".to_string(),
1012 "Use subprocess with list arguments instead".to_string(),
1013 "Validate and sanitize all user inputs".to_string(),
1014 ],
1015 references: vec![
1016 "https://owasp.org/www-project-top-ten/2021/A03_2021-Injection/".to_string(),
1017 ],
1018 cwe_id: Some("CWE-78".to_string()),
1019 compliance_frameworks: vec!["SOC2".to_string()],
1020 });
1021 }
1022 }
1023 }
1024
1025 Ok(findings)
1026 }
1027
1028 fn detect_python_framework(&self, content: &str) -> Option<String> {
1030 if content.contains("django") || content.contains("Django") {
1031 Some("django".to_string())
1032 } else if content.contains("flask") || content.contains("Flask") {
1033 Some("flask".to_string())
1034 } else if content.contains("fastapi") || content.contains("FastAPI") {
1035 Some("fastapi".to_string())
1036 } else {
1037 None
1038 }
1039 }
1040
1041 fn mask_secret(&self, secret: &str) -> String {
1043 if secret.len() <= 8 {
1044 "*".repeat(secret.len())
1045 } else {
1046 format!("{}***{}", &secret[..4], &secret[secret.len()-4..])
1047 }
1048 }
1049
1050 fn mask_database_connection(&self, connection_str: &str) -> String {
1052 if let Ok(re) = Regex::new(r"://([^:]+):([^@]+)@") {
1054 re.replace(connection_str, "://$1:***@").to_string()
1055 } else {
1056 connection_str.to_string()
1057 }
1058 }
1059
1060 fn calculate_context_confidence(&self, content: &str, indicators: &[String]) -> f32 {
1062 let total_indicators = indicators.len() as f32;
1063 if total_indicators == 0.0 {
1064 return 0.5; }
1066
1067 let found_indicators = indicators.iter()
1068 .filter(|indicator| content.to_lowercase().contains(&indicator.to_lowercase()))
1069 .count() as f32;
1070
1071 found_indicators / total_indicators
1072 }
1073
1074 fn adjust_severity_by_context(&self, base_severity: SecuritySeverity, confidence: f32) -> SecuritySeverity {
1076 match base_severity {
1077 SecuritySeverity::Critical => base_severity, SecuritySeverity::High => {
1079 if confidence < 0.3 {
1080 SecuritySeverity::Medium
1081 } else {
1082 base_severity
1083 }
1084 }
1085 SecuritySeverity::Medium => {
1086 if confidence > 0.7 {
1087 SecuritySeverity::High
1088 } else if confidence < 0.3 {
1089 SecuritySeverity::Low
1090 } else {
1091 base_severity
1092 }
1093 }
1094 _ => base_severity,
1095 }
1096 }
1097
1098 fn generate_framework_remediation(&self, framework: &str) -> Vec<String> {
1100 match framework.to_lowercase().as_str() {
1101 "django" => vec![
1102 "Use Django's built-in security features".to_string(),
1103 "Store SECRET_KEY in environment variables".to_string(),
1104 "Use django-environ for configuration management".to_string(),
1105 "Enable Django's security middleware".to_string(),
1106 ],
1107 "flask" => vec![
1108 "Use Flask-Security for authentication".to_string(),
1109 "Store secrets in environment variables".to_string(),
1110 "Use Flask-Talisman for security headers".to_string(),
1111 "Implement proper session management".to_string(),
1112 ],
1113 "fastapi" => vec![
1114 "Use Pydantic Settings for configuration".to_string(),
1115 "Implement proper JWT token management".to_string(),
1116 "Use dependency injection for secrets".to_string(),
1117 "Enable HTTPS and security headers".to_string(),
1118 ],
1119 _ => vec![
1120 "Follow framework-specific security best practices".to_string(),
1121 "Use environment variables for sensitive data".to_string(),
1122 ],
1123 }
1124 }
1125
1126 fn enhance_finding_with_gitignore_status(
1128 &self,
1129 finding: &mut SecurityFinding,
1130 gitignore_status: &super::gitignore::GitIgnoreStatus,
1131 ) {
1132 finding.severity = match gitignore_status.risk_level {
1134 GitIgnoreRisk::Tracked => SecuritySeverity::Critical, GitIgnoreRisk::Exposed => {
1136 match &finding.severity {
1138 SecuritySeverity::Medium => SecuritySeverity::High,
1139 SecuritySeverity::Low => SecuritySeverity::Medium,
1140 other => other.clone(),
1141 }
1142 }
1143 GitIgnoreRisk::Protected => {
1144 match &finding.severity {
1146 SecuritySeverity::Critical => SecuritySeverity::High,
1147 SecuritySeverity::High => SecuritySeverity::Medium,
1148 other => other.clone(),
1149 }
1150 }
1151 GitIgnoreRisk::Safe => finding.severity.clone(),
1152 };
1153
1154 finding.description.push_str(&format!(" (GitIgnore: {})", gitignore_status.description()));
1156
1157 if gitignore_status.risk_level == GitIgnoreRisk::Tracked {
1159 finding.remediation.insert(0, "⚠️ CRITICAL: Remove this file from git history using git-filter-branch or BFG Repo-Cleaner".to_string());
1160 finding.remediation.insert(1, "🔑 Rotate any exposed secrets immediately".to_string());
1161 }
1162 }
1163
1164 fn analyze_config_files_with_gitignore(
1166 &self,
1167 project_root: &Path,
1168 gitignore_analyzer: &mut GitIgnoreAnalyzer,
1169 ) -> Result<Vec<SecurityFinding>, SecurityError> {
1170 let mut findings = Vec::new();
1171
1172 let config_files = [
1174 "settings.py", "config.py", "main.py", "app.py", "manage.py", "wsgi.py", "asgi.py", ];
1182
1183 for config_file in &config_files {
1184 let config_path = project_root.join(config_file);
1185 if config_path.exists() {
1186 let gitignore_status = gitignore_analyzer.analyze_file(&config_path);
1187
1188 if let Ok(content) = fs::read_to_string(&config_path) {
1189 if self.contains_potential_python_secrets(&content) {
1191 let mut finding = SecurityFinding {
1192 id: format!("config-file-{}", config_file.replace('.', "-")),
1193 title: "Potential Secrets in Python Configuration File".to_string(),
1194 description: format!("Python configuration file '{}' may contain secrets", config_file),
1195 severity: SecuritySeverity::Medium,
1196 category: SecurityCategory::SecretsExposure,
1197 file_path: Some(config_path.clone()),
1198 line_number: None,
1199 column_number: None,
1200 evidence: None,
1201 remediation: vec![
1202 "Review configuration file for hardcoded secrets".to_string(),
1203 "Use environment variables for sensitive configuration".to_string(),
1204 "Consider using python-decouple or similar libraries".to_string(),
1205 ],
1206 references: vec![
1207 "https://12factor.net/config".to_string(),
1208 ],
1209 cwe_id: Some("CWE-200".to_string()),
1210 compliance_frameworks: vec!["SOC2".to_string()],
1211 };
1212
1213 self.enhance_finding_with_gitignore_status(&mut finding, &gitignore_status);
1214 findings.push(finding);
1215 }
1216 }
1217 }
1218 }
1219
1220 Ok(findings)
1221 }
1222
1223 fn analyze_dependency_files_with_gitignore(
1225 &self,
1226 project_root: &Path,
1227 gitignore_analyzer: &mut GitIgnoreAnalyzer,
1228 ) -> Result<Vec<SecurityFinding>, SecurityError> {
1229 let mut findings = Vec::new();
1230
1231 let dependency_files = [
1233 "requirements.txt",
1234 "requirements-dev.txt",
1235 "requirements-prod.txt",
1236 "Pipfile",
1237 "Pipfile.lock",
1238 "pyproject.toml",
1239 "poetry.lock",
1240 "conda-requirements.txt",
1241 "environment.yml",
1242 ];
1243
1244 for dep_file in &dependency_files {
1245 let dep_path = project_root.join(dep_file);
1246 if dep_path.exists() {
1247 let gitignore_status = gitignore_analyzer.analyze_file(&dep_path);
1248
1249 if let Ok(content) = fs::read_to_string(&dep_path) {
1251 if self.contains_potential_python_secrets(&content) {
1252 let mut finding = SecurityFinding {
1253 id: format!("dependency-file-{}", dep_file.replace('.', "-").replace('-', "_")),
1254 title: "Potential Secrets in Python Dependency File".to_string(),
1255 description: format!("Python dependency file '{}' may contain secrets", dep_file),
1256 severity: SecuritySeverity::High,
1257 category: SecurityCategory::SecretsExposure,
1258 file_path: Some(dep_path.clone()),
1259 line_number: None,
1260 column_number: None,
1261 evidence: None,
1262 remediation: vec![
1263 "Remove any secrets from dependency files".to_string(),
1264 "Use environment variables for configuration".to_string(),
1265 "Review dependency sources for security".to_string(),
1266 ],
1267 references: vec![
1268 "https://pip.pypa.io/en/stable/topics/secure-installs/".to_string(),
1269 ],
1270 cwe_id: Some("CWE-200".to_string()),
1271 compliance_frameworks: vec!["SOC2".to_string()],
1272 };
1273
1274 self.enhance_finding_with_gitignore_status(&mut finding, &gitignore_status);
1275 findings.push(finding);
1276 }
1277 }
1278 }
1279 }
1280
1281 Ok(findings)
1282 }
1283
1284 fn analyze_env_files_with_gitignore(
1286 &self,
1287 project_root: &Path,
1288 gitignore_analyzer: &mut GitIgnoreAnalyzer,
1289 ) -> Result<Vec<SecurityFinding>, SecurityError> {
1290 let mut findings = Vec::new();
1291
1292 let env_files = gitignore_analyzer.get_files_to_analyze(&[])
1294 .map_err(|e| SecurityError::Io(e))?
1295 .into_iter()
1296 .filter(|file| {
1297 if let Some(file_name) = file.file_name().and_then(|n| n.to_str()) {
1298 if self.is_template_file(file_name) {
1300 debug!("Skipping template file: {}", file_name);
1301 return false;
1302 }
1303
1304 file_name.starts_with(".env") ||
1305 file_name.contains("credentials") ||
1306 file_name.contains("secrets") ||
1307 file_name.ends_with(".key") ||
1308 file_name.ends_with(".pem") ||
1309 file_name == "secret.json" ||
1310 file_name == "service-account.json"
1311 } else {
1312 false
1313 }
1314 })
1315 .collect::<Vec<_>>();
1316
1317 for env_file in env_files {
1318 let gitignore_status = gitignore_analyzer.analyze_file(&env_file);
1319 let relative_path = env_file.strip_prefix(project_root)
1320 .unwrap_or(&env_file);
1321
1322 let (severity, title, description) = match gitignore_status.risk_level {
1324 GitIgnoreRisk::Tracked => (
1325 SecuritySeverity::Critical,
1326 "Python Secret File Tracked by Git".to_string(),
1327 format!("Python secret file '{}' is tracked by git and may expose credentials in version history", relative_path.display()),
1328 ),
1329 GitIgnoreRisk::Exposed => (
1330 SecuritySeverity::High,
1331 "Python Secret File Not in GitIgnore".to_string(),
1332 format!("Python secret file '{}' exists but is not protected by .gitignore", relative_path.display()),
1333 ),
1334 GitIgnoreRisk::Protected => (
1335 SecuritySeverity::Info,
1336 "Python Secret File Properly Protected".to_string(),
1337 format!("Python secret file '{}' is properly ignored but detected for verification", relative_path.display()),
1338 ),
1339 GitIgnoreRisk::Safe => continue, };
1341
1342 let mut finding = SecurityFinding {
1343 id: format!("python-env-file-{}", relative_path.to_string_lossy().replace('/', "-").replace('.', "-")),
1344 title,
1345 description,
1346 severity,
1347 category: SecurityCategory::SecretsExposure,
1348 file_path: Some(env_file.clone()),
1349 line_number: None,
1350 column_number: None,
1351 evidence: None,
1352 remediation: vec![
1353 "Ensure sensitive files are in .gitignore".to_string(),
1354 "Use .env.example files for documentation".to_string(),
1355 "Never commit actual environment files to version control".to_string(),
1356 "Use python-decouple for environment variable management".to_string(),
1357 ],
1358 references: vec![
1359 "https://github.com/motdotla/dotenv#should-i-commit-my-env-file".to_string(),
1360 "https://pypi.org/project/python-decouple/".to_string(),
1361 ],
1362 cwe_id: Some("CWE-200".to_string()),
1363 compliance_frameworks: vec!["SOC2".to_string()],
1364 };
1365
1366 self.enhance_finding_with_gitignore_status(&mut finding, &gitignore_status);
1367 findings.push(finding);
1368 }
1369
1370 Ok(findings)
1371 }
1372
1373 fn is_template_file(&self, file_name: &str) -> bool {
1375 let template_indicators = [
1376 "sample", "example", "template", "template.env", "env.template",
1377 "sample.env", "env.sample", "example.env", "env.example",
1378 "examples", "samples", "templates", "demo", "test",
1379 ".env.sample", ".env.example", ".env.template", ".env.demo", ".env.test",
1380 "example.json", "sample.json", "template.json"
1381 ];
1382
1383 let file_name_lower = file_name.to_lowercase();
1384
1385 template_indicators.iter().any(|indicator| {
1387 file_name_lower == *indicator ||
1388 file_name_lower.contains(indicator) ||
1389 file_name_lower.ends_with(indicator)
1390 })
1391 }
1392
1393 fn contains_potential_python_secrets(&self, content: &str) -> bool {
1395 let secret_indicators = [
1396 "sk_", "pk_live_", "eyJ", "AKIA", "-----BEGIN",
1397 "client_secret", "api_key", "access_token", "SECRET_KEY",
1398 "private_key", "secret_key", "bearer", "password",
1399 "token", "credentials", "auth"
1400 ];
1401
1402 let content_lower = content.to_lowercase();
1403 secret_indicators.iter().any(|indicator| content_lower.contains(&indicator.to_lowercase()))
1404 }
1405
1406 fn generate_python_security_recommendations(&self) -> Vec<String> {
1408 vec![
1409 "🐍 Python Security Best Practices:".to_string(),
1410 " • Use environment variables for all secrets and configuration".to_string(),
1411 " • Install python-decouple or python-dotenv for configuration management".to_string(),
1412 " • Keep requirements.txt and poetry.lock files up to date".to_string(),
1413 " • Use virtual environments to isolate dependencies".to_string(),
1414 " • Run 'pip-audit' or 'safety check' to scan for vulnerable packages".to_string(),
1415 " • Enable Django's security middleware if using Django".to_string(),
1416 " • Use parameterized queries to prevent SQL injection".to_string(),
1417 " • Validate and sanitize all user inputs".to_string(),
1418 " • Use HTTPS in production environments".to_string(),
1419 " • Implement proper error handling and logging".to_string(),
1420 " • Consider using tools like bandit for static security analysis".to_string(),
1421 ]
1422 }
1423}