syncable_cli/analyzer/security/
python.rs

1//! # Python Security Analyzer
2//! 
3//! Specialized security analyzer for Python applications.
4//! 
5//! This analyzer focuses on:
6//! - Python web frameworks (Django, Flask, FastAPI, etc.)
7//! - AI/ML services and tools (OpenAI, Anthropic, Hugging Face, etc.)
8//! - Cloud services commonly used with Python (AWS, GCP, Azure)
9//! - Database connections and ORMs (SQLAlchemy, Django ORM, etc.)
10//! - Environment variable misuse in Python applications
11//! - Common Python anti-patterns and secret exposure patterns
12//! - Python package managers and dependency files
13
14use std::collections::HashMap;
15use std::path::{Path, PathBuf};
16use std::fs;
17use regex::Regex;
18use log::{debug, info, warn};
19
20use super::{SecurityError, SecurityFinding, SecuritySeverity, SecurityCategory, SecurityReport, SecurityAnalysisConfig, GitIgnoreAnalyzer, GitIgnoreRisk};
21
22/// Python-specific security analyzer
23pub struct PythonSecurityAnalyzer {
24    config: SecurityAnalysisConfig,
25    python_patterns: Vec<PythonSecretPattern>,
26    framework_patterns: HashMap<String, Vec<FrameworkPattern>>,
27    ai_ml_patterns: Vec<AiMlPattern>,
28    cloud_patterns: Vec<CloudPattern>,
29    database_patterns: Vec<DatabasePattern>,
30    env_var_patterns: Vec<EnvVarPattern>,
31    gitignore_analyzer: Option<GitIgnoreAnalyzer>,
32}
33
34/// Python-specific secret pattern
35#[derive(Debug, Clone)]
36pub struct PythonSecretPattern {
37    pub id: String,
38    pub name: String,
39    pub pattern: Regex,
40    pub severity: SecuritySeverity,
41    pub description: String,
42    pub context_indicators: Vec<String>,
43    pub false_positive_indicators: Vec<String>,
44    pub remediation_hints: Vec<String>,
45}
46
47/// Framework-specific patterns for Python web frameworks
48#[derive(Debug, Clone)]
49pub struct FrameworkPattern {
50    pub framework: String,
51    pub pattern: Regex,
52    pub severity: SecuritySeverity,
53    pub description: String,
54    pub file_extensions: Vec<String>,
55}
56
57/// AI/ML service patterns
58#[derive(Debug, Clone)]
59pub struct AiMlPattern {
60    pub service: String,
61    pub pattern: Regex,
62    pub severity: SecuritySeverity,
63    pub description: String,
64    pub api_key_format: String,
65}
66
67/// Cloud service patterns
68#[derive(Debug, Clone)]
69pub struct CloudPattern {
70    pub provider: String,
71    pub service: String,
72    pub pattern: Regex,
73    pub severity: SecuritySeverity,
74    pub description: String,
75}
76
77/// Database connection patterns
78#[derive(Debug, Clone)]
79pub struct DatabasePattern {
80    pub database_type: String,
81    pub pattern: Regex,
82    pub severity: SecuritySeverity,
83    pub description: String,
84}
85
86/// Environment variable patterns specific to Python
87#[derive(Debug, Clone)]
88pub struct EnvVarPattern {
89    pub pattern: Regex,
90    pub severity: SecuritySeverity,
91    pub description: String,
92    pub sensitive_prefixes: Vec<String>,
93}
94
95impl PythonSecurityAnalyzer {
96    pub fn new() -> Result<Self, SecurityError> {
97        Self::with_config(SecurityAnalysisConfig::default())
98    }
99    
100    pub fn with_config(config: SecurityAnalysisConfig) -> Result<Self, SecurityError> {
101        let python_patterns = Self::initialize_python_patterns()?;
102        let framework_patterns = Self::initialize_framework_patterns()?;
103        let ai_ml_patterns = Self::initialize_ai_ml_patterns()?;
104        let cloud_patterns = Self::initialize_cloud_patterns()?;
105        let database_patterns = Self::initialize_database_patterns()?;
106        let env_var_patterns = Self::initialize_env_var_patterns()?;
107        
108        Ok(Self {
109            config,
110            python_patterns,
111            framework_patterns,
112            ai_ml_patterns,
113            cloud_patterns,
114            database_patterns,
115            env_var_patterns,
116            gitignore_analyzer: None,
117        })
118    }
119    
120    /// Analyze a Python project for security vulnerabilities
121    pub fn analyze_project(&mut self, project_root: &Path) -> Result<SecurityReport, SecurityError> {
122        let mut findings = Vec::new();
123        
124        // Initialize gitignore analyzer for comprehensive file protection assessment
125        let mut gitignore_analyzer = GitIgnoreAnalyzer::new(project_root)
126            .map_err(|e| SecurityError::AnalysisFailed(format!("Failed to initialize gitignore analyzer: {}", e)))?;
127        
128        info!("🔍 Using gitignore-aware security analysis for Python project at {}", project_root.display());
129        
130        // Get Python files using gitignore-aware collection
131        let python_extensions = ["py", "pyx", "pyi", "pyw"];
132        let python_files = gitignore_analyzer.get_files_to_analyze(&python_extensions)
133            .map_err(|e| SecurityError::Io(e))?
134            .into_iter()
135            .filter(|file| {
136                if let Some(ext) = file.extension().and_then(|e| e.to_str()) {
137                    python_extensions.contains(&ext)
138                } else {
139                    false
140                }
141            })
142            .collect::<Vec<_>>();
143        
144        info!("Found {} Python files to analyze (gitignore-filtered)", python_files.len());
145        
146        // Analyze each Python file with gitignore context
147        for file_path in &python_files {
148            let gitignore_status = gitignore_analyzer.analyze_file(file_path);
149            let mut file_findings = self.analyze_python_file(file_path)?;
150            
151            // Enhance findings with gitignore risk assessment
152            for finding in &mut file_findings {
153                self.enhance_finding_with_gitignore_status(finding, &gitignore_status);
154            }
155            
156            findings.extend(file_findings);
157        }
158        
159        // Analyze Python configuration files with gitignore awareness
160        findings.extend(self.analyze_config_files_with_gitignore(project_root, &mut gitignore_analyzer)?);
161        
162        // Comprehensive environment file analysis with gitignore risk assessment
163        findings.extend(self.analyze_env_files_with_gitignore(project_root, &mut gitignore_analyzer)?);
164        
165        // Analyze Python-specific dependency files
166        findings.extend(self.analyze_dependency_files_with_gitignore(project_root, &mut gitignore_analyzer)?);
167        
168        // Generate gitignore recommendations for any secret files found
169        let secret_files: Vec<PathBuf> = findings.iter()
170            .filter_map(|f| f.file_path.as_ref())
171            .cloned()
172            .collect();
173        
174        let gitignore_recommendations = gitignore_analyzer.generate_gitignore_recommendations(&secret_files);
175        
176        // Create report with enhanced recommendations
177        let mut report = SecurityReport::from_findings(findings);
178        report.recommendations.extend(gitignore_recommendations);
179        
180        // Add Python-specific security recommendations
181        report.recommendations.extend(self.generate_python_security_recommendations());
182        
183        Ok(report)
184    }
185    
186    /// Analyze a single Python file for security vulnerabilities
187    fn analyze_python_file(&self, file_path: &Path) -> Result<Vec<SecurityFinding>, SecurityError> {
188        let content = fs::read_to_string(file_path)?;
189        let mut findings = Vec::new();
190        
191        // Check against Python-specific patterns
192        for pattern in &self.python_patterns {
193            findings.extend(self.check_python_pattern_in_content(&content, pattern, file_path)?);
194        }
195        
196        // Check against AI/ML service patterns
197        for pattern in &self.ai_ml_patterns {
198            findings.extend(self.check_ai_ml_pattern_in_content(&content, pattern, file_path)?);
199        }
200        
201        // Check against cloud service patterns
202        for pattern in &self.cloud_patterns {
203            findings.extend(self.check_cloud_pattern_in_content(&content, pattern, file_path)?);
204        }
205        
206        // Check against database patterns
207        for pattern in &self.database_patterns {
208            findings.extend(self.check_database_pattern_in_content(&content, pattern, file_path)?);
209        }
210        
211        // Check framework-specific patterns based on file content
212        let detected_framework = self.detect_python_framework(&content);
213        if let Some(framework) = detected_framework {
214            if let Some(framework_patterns) = self.framework_patterns.get(&framework) {
215                for pattern in framework_patterns {
216                    findings.extend(self.check_framework_pattern_in_content(&content, pattern, file_path)?);
217                }
218            }
219        }
220        
221        // Check environment variable usage
222        findings.extend(self.check_env_var_usage(&content, file_path)?);
223        
224        // Check for insecure Python practices
225        findings.extend(self.check_insecure_python_practices(&content, file_path)?);
226        
227        Ok(findings)
228    }
229    
230    /// Check a Python-specific pattern in file content
231    fn check_python_pattern_in_content(
232        &self,
233        content: &str,
234        pattern: &PythonSecretPattern,
235        file_path: &Path,
236    ) -> Result<Vec<SecurityFinding>, SecurityError> {
237        let mut findings = Vec::new();
238        
239        for (line_num, line) in content.lines().enumerate() {
240            if let Some(captures) = pattern.pattern.captures(line) {
241                // Check for false positive indicators
242                if pattern.false_positive_indicators.iter().any(|indicator| {
243                    line.to_lowercase().contains(&indicator.to_lowercase())
244                }) {
245                    debug!("Skipping potential false positive in {}: {}", file_path.display(), line.trim());
246                    continue;
247                }
248                
249                // Extract the secret value and position if captured
250                let (evidence, column_number) = if captures.len() > 1 {
251                    if let Some(match_) = captures.get(1) {
252                        (Some(self.mask_secret(match_.as_str())), Some(match_.start() + 1))
253                    } else {
254                        (Some(line.trim().to_string()), None)
255                    }
256                } else {
257                    if let Some(match_) = captures.get(0) {
258                        (Some(line.trim().to_string()), Some(match_.start() + 1))
259                    } else {
260                        (Some(line.trim().to_string()), None)
261                    }
262                };
263                
264                // Check context for confidence scoring
265                let context_score = self.calculate_context_confidence(content, &pattern.context_indicators);
266                let adjusted_severity = self.adjust_severity_by_context(pattern.severity.clone(), context_score);
267                
268                findings.push(SecurityFinding {
269                    id: format!("{}-{}", pattern.id, line_num),
270                    title: format!("{} Detected", pattern.name),
271                    description: format!("{} (Context confidence: {:.1})", pattern.description, context_score),
272                    severity: adjusted_severity,
273                    category: SecurityCategory::SecretsExposure,
274                    file_path: Some(file_path.to_path_buf()),
275                    line_number: Some(line_num + 1),
276                    column_number,
277                    evidence,
278                    remediation: pattern.remediation_hints.clone(),
279                    references: vec![
280                        "https://owasp.org/www-project-top-ten/2021/A05_2021-Security_Misconfiguration/".to_string(),
281                        "https://cheatsheetseries.owasp.org/cheatsheets/Secrets_Management_Cheat_Sheet.html".to_string(),
282                        "https://docs.python.org/3/library/os.html#os.environ".to_string(),
283                    ],
284                    cwe_id: Some("CWE-200".to_string()),
285                    compliance_frameworks: vec!["SOC2".to_string(), "GDPR".to_string()],
286                });
287            }
288        }
289        
290        Ok(findings)
291    }
292    
293    /// Check AI/ML service patterns
294    fn check_ai_ml_pattern_in_content(
295        &self,
296        content: &str,
297        pattern: &AiMlPattern,
298        file_path: &Path,
299    ) -> Result<Vec<SecurityFinding>, SecurityError> {
300        let mut findings = Vec::new();
301        
302        for (line_num, line) in content.lines().enumerate() {
303            if let Some(captures) = pattern.pattern.captures(line) {
304                let evidence = if captures.len() > 1 {
305                    captures.get(1).map(|m| self.mask_secret(m.as_str()))
306                } else {
307                    Some(line.trim().to_string())
308                };
309                
310                let column_number = captures.get(0).map(|m| m.start() + 1);
311                
312                findings.push(SecurityFinding {
313                    id: format!("ai-ml-{}-{}", pattern.service.to_lowercase().replace(" ", "-"), line_num),
314                    title: format!("{} API Key Detected", pattern.service),
315                    description: format!("{} (Expected format: {})", pattern.description, pattern.api_key_format),
316                    severity: pattern.severity.clone(),
317                    category: SecurityCategory::SecretsExposure,
318                    file_path: Some(file_path.to_path_buf()),
319                    line_number: Some(line_num + 1),
320                    column_number,
321                    evidence,
322                    remediation: vec![
323                        format!("Store {} API key in environment variables", pattern.service),
324                        "Use a secrets management service for production".to_string(),
325                        "Implement API key rotation policies".to_string(),
326                        "Monitor API key usage for anomalies".to_string(),
327                    ],
328                    references: vec![
329                        "https://owasp.org/www-project-api-security/".to_string(),
330                        format!("https://platform.openai.com/docs/quickstart/account-setup"),
331                    ],
332                    cwe_id: Some("CWE-798".to_string()),
333                    compliance_frameworks: vec!["SOC2".to_string(), "GDPR".to_string()],
334                });
335            }
336        }
337        
338        Ok(findings)
339    }
340    
341    /// Check cloud service patterns
342    fn check_cloud_pattern_in_content(
343        &self,
344        content: &str,
345        pattern: &CloudPattern,
346        file_path: &Path,
347    ) -> Result<Vec<SecurityFinding>, SecurityError> {
348        let mut findings = Vec::new();
349        
350        for (line_num, line) in content.lines().enumerate() {
351            if let Some(captures) = pattern.pattern.captures(line) {
352                let evidence = if captures.len() > 1 {
353                    captures.get(1).map(|m| self.mask_secret(m.as_str()))
354                } else {
355                    Some(line.trim().to_string())
356                };
357                
358                let column_number = captures.get(0).map(|m| m.start() + 1);
359                
360                findings.push(SecurityFinding {
361                    id: format!("cloud-{}-{}-{}", 
362                              pattern.provider.to_lowercase(),
363                              pattern.service.to_lowercase().replace(" ", "-"),
364                              line_num),
365                    title: format!("{} {} Detected", pattern.provider, pattern.service),
366                    description: pattern.description.clone(),
367                    severity: pattern.severity.clone(),
368                    category: SecurityCategory::SecretsExposure,
369                    file_path: Some(file_path.to_path_buf()),
370                    line_number: Some(line_num + 1),
371                    column_number,
372                    evidence,
373                    remediation: vec![
374                        format!("Use {} managed identity or role-based access", pattern.provider),
375                        "Store credentials in secure key management service".to_string(),
376                        "Implement credential rotation policies".to_string(),
377                        "Use least-privilege access principles".to_string(),
378                    ],
379                    references: vec![
380                        "https://owasp.org/www-project-top-ten/2021/A07_2021-Identification_and_Authentication_Failures/".to_string(),
381                        format!("https://docs.aws.amazon.com/security/"),
382                    ],
383                    cwe_id: Some("CWE-522".to_string()),
384                    compliance_frameworks: vec!["SOC2".to_string(), "PCI-DSS".to_string()],
385                });
386            }
387        }
388        
389        Ok(findings)
390    }
391    
392    /// Check database patterns
393    fn check_database_pattern_in_content(
394        &self,
395        content: &str,
396        pattern: &DatabasePattern,
397        file_path: &Path,
398    ) -> Result<Vec<SecurityFinding>, SecurityError> {
399        let mut findings = Vec::new();
400        
401        for (line_num, line) in content.lines().enumerate() {
402            if pattern.pattern.is_match(line) {
403                // Mask the connection string for evidence
404                let masked_line = self.mask_database_connection(line);
405                
406                findings.push(SecurityFinding {
407                    id: format!("database-{}-{}", pattern.database_type.to_lowercase(), line_num),
408                    title: format!("{} Connection String with Credentials", pattern.database_type),
409                    description: pattern.description.clone(),
410                    severity: pattern.severity.clone(),
411                    category: SecurityCategory::SecretsExposure,
412                    file_path: Some(file_path.to_path_buf()),
413                    line_number: Some(line_num + 1),
414                    column_number: None,
415                    evidence: Some(masked_line),
416                    remediation: vec![
417                        "Use environment variables for database credentials".to_string(),
418                        "Implement connection pooling with credential management".to_string(),
419                        "Use database authentication mechanisms like IAM roles".to_string(),
420                        "Consider using encrypted connection strings".to_string(),
421                    ],
422                    references: vec![
423                        "https://owasp.org/www-project-top-ten/2021/A07_2021-Identification_and_Authentication_Failures/".to_string(),
424                        "https://cheatsheetseries.owasp.org/cheatsheets/Database_Security_Cheat_Sheet.html".to_string(),
425                    ],
426                    cwe_id: Some("CWE-798".to_string()),
427                    compliance_frameworks: vec!["SOC2".to_string(), "GDPR".to_string(), "PCI-DSS".to_string()],
428                });
429            }
430        }
431        
432        Ok(findings)
433    }
434    
435    /// Check framework-specific patterns
436    fn check_framework_pattern_in_content(
437        &self,
438        content: &str,
439        pattern: &FrameworkPattern,
440        file_path: &Path,
441    ) -> Result<Vec<SecurityFinding>, SecurityError> {
442        let mut findings = Vec::new();
443        
444        for (line_num, line) in content.lines().enumerate() {
445            if let Some(captures) = pattern.pattern.captures(line) {
446                let evidence = if captures.len() > 1 {
447                    captures.get(1).map(|m| self.mask_secret(m.as_str()))
448                } else {
449                    Some(line.trim().to_string())
450                };
451                
452                findings.push(SecurityFinding {
453                    id: format!("framework-{}-{}", pattern.framework.to_lowercase(), line_num),
454                    title: format!("{} Security Issue", pattern.framework),
455                    description: pattern.description.clone(),
456                    severity: pattern.severity.clone(),
457                    category: SecurityCategory::SecretsExposure,
458                    file_path: Some(file_path.to_path_buf()),
459                    line_number: Some(line_num + 1),
460                    column_number: None,
461                    evidence,
462                    remediation: self.generate_framework_remediation(&pattern.framework),
463                    references: vec![
464                        format!("https://docs.djangoproject.com/en/stable/topics/security/"),
465                        "https://owasp.org/www-project-top-ten/".to_string(),
466                    ],
467                    cwe_id: Some("CWE-200".to_string()),
468                    compliance_frameworks: vec!["SOC2".to_string()],
469                });
470            }
471        }
472        
473        Ok(findings)
474    }
475    
476    /// Initialize Python-specific secret patterns
477    fn initialize_python_patterns() -> Result<Vec<PythonSecretPattern>, SecurityError> {
478        let patterns = vec![
479            // Django SECRET_KEY pattern
480            PythonSecretPattern {
481                id: "python-django-secret-key".to_string(),
482                name: "Django SECRET_KEY".to_string(),
483                pattern: Regex::new(r#"(?i)SECRET_KEY\s*=\s*["']([A-Za-z0-9!@#$%^&*()_+\-=\[\]{}|;:,.<>?/~`]{40,})["']"#)?,
484                severity: SecuritySeverity::Critical,
485                description: "Django SECRET_KEY found in source code".to_string(),
486                context_indicators: vec!["django".to_string(), "settings".to_string(), "SECRET_KEY".to_string()],
487                false_positive_indicators: vec!["example".to_string(), "your-secret-key".to_string(), "fake".to_string()],
488                remediation_hints: vec![
489                    "Move SECRET_KEY to environment variables".to_string(),
490                    "Use python-decouple or similar library".to_string(),
491                    "Never commit SECRET_KEY to version control".to_string(),
492                ],
493            },
494            
495            // Flask SECRET_KEY pattern
496            PythonSecretPattern {
497                id: "python-flask-secret-key".to_string(),
498                name: "Flask SECRET_KEY".to_string(),
499                pattern: Regex::new(r#"(?i)app\.secret_key\s*=\s*["']([A-Za-z0-9!@#$%^&*()_+\-=\[\]{}|;:,.<>?/~`]{20,})["']"#)?,
500                severity: SecuritySeverity::High,
501                description: "Flask SECRET_KEY hardcoded in application".to_string(),
502                context_indicators: vec!["flask".to_string(), "app".to_string(), "secret_key".to_string()],
503                false_positive_indicators: vec!["example".to_string(), "your-secret".to_string()],
504                remediation_hints: vec![
505                    "Use os.environ.get('SECRET_KEY')".to_string(),
506                    "Store in environment variables".to_string(),
507                ],
508            },
509            
510            // FastAPI JWT secret
511            PythonSecretPattern {
512                id: "python-fastapi-jwt-secret".to_string(),
513                name: "FastAPI JWT Secret".to_string(),
514                pattern: Regex::new(r#"(?i)(?:jwt_secret|jwt_key|secret_key)\s*=\s*["']([A-Za-z0-9!@#$%^&*()_+\-=\[\]{}|;:,.<>?/~`]{20,})["']"#)?,
515                severity: SecuritySeverity::High,
516                description: "FastAPI JWT secret hardcoded in source".to_string(),
517                context_indicators: vec!["fastapi".to_string(), "jwt".to_string(), "token".to_string()],
518                false_positive_indicators: vec!["example".to_string(), "test".to_string()],
519                remediation_hints: vec![
520                    "Use Pydantic Settings for configuration".to_string(),
521                    "Store JWT secrets in environment variables".to_string(),
522                ],
523            },
524            
525            // Database connection strings
526            PythonSecretPattern {
527                id: "python-database-url".to_string(),
528                name: "Database Connection String".to_string(),
529                pattern: Regex::new(r#"(?i)(?:database_url|db_url|sqlalchemy_database_uri)\s*=\s*["'](?:postgresql|mysql|sqlite|mongodb)://[^"']*:[^"']*@[^"']+["']"#)?,
530                severity: SecuritySeverity::Critical,
531                description: "Database connection string with credentials detected".to_string(),
532                context_indicators: vec!["database".to_string(), "sqlalchemy".to_string(), "connect".to_string()],
533                false_positive_indicators: vec!["localhost".to_string(), "example.com".to_string(), "user:pass".to_string()],
534                remediation_hints: vec![
535                    "Use environment variables for database credentials".to_string(),
536                    "Consider using connection pooling and secrets management".to_string(),
537                ],
538            },
539            
540            // Generic API key pattern
541            PythonSecretPattern {
542                id: "python-api-key-assignment".to_string(),
543                name: "API Key Assignment".to_string(),
544                pattern: Regex::new(r#"(?i)(?:api_key|apikey|access_key|secret_key|private_key|auth_token|bearer_token)\s*=\s*["']([A-Za-z0-9_-]{20,})["']"#)?,
545                severity: SecuritySeverity::High,
546                description: "API key hardcoded in variable assignment".to_string(),
547                context_indicators: vec!["requests".to_string(), "api".to_string(), "client".to_string()],
548                false_positive_indicators: vec!["os.environ".to_string(), "config".to_string(), "settings".to_string()],
549                remediation_hints: vec![
550                    "Use environment variables or config files".to_string(),
551                    "Consider using secrets management services".to_string(),
552                ],
553            },
554        ];
555        
556        Ok(patterns)
557    }
558    
559    /// Initialize AI/ML service patterns
560    fn initialize_ai_ml_patterns() -> Result<Vec<AiMlPattern>, SecurityError> {
561        let patterns = vec![
562            // OpenAI API keys
563            AiMlPattern {
564                service: "OpenAI".to_string(),
565                pattern: Regex::new(r#"(?i)(?:openai[_-]?api[_-]?key|openai[_-]?key)\s*[=:]\s*["']?(sk-[A-Za-z0-9]{32,})["']?"#)?,
566                severity: SecuritySeverity::Critical,
567                description: "OpenAI API key detected".to_string(),
568                api_key_format: "sk-[32+ alphanumeric characters]".to_string(),
569            },
570            
571            // OpenAI Organization ID
572            AiMlPattern {
573                service: "OpenAI Organization".to_string(),
574                pattern: Regex::new(r#"(?i)(?:openai[_-]?org[_-]?id|openai[_-]?organization)\s*[=:]\s*["']?(org-[A-Za-z0-9]{20,})["']?"#)?,
575                severity: SecuritySeverity::Medium,
576                description: "OpenAI organization ID detected".to_string(),
577                api_key_format: "org-[20+ alphanumeric characters]".to_string(),
578            },
579            
580            // Anthropic Claude API keys
581            AiMlPattern {
582                service: "Anthropic Claude".to_string(),
583                pattern: Regex::new(r#"(?i)(?:anthropic[_-]?api[_-]?key|claude[_-]?api[_-]?key)\s*[=:]\s*["']?(sk-ant-[A-Za-z0-9]{40,})["']?"#)?,
584                severity: SecuritySeverity::Critical,
585                description: "Anthropic Claude API key detected".to_string(),
586                api_key_format: "sk-ant-[40+ alphanumeric characters]".to_string(),
587            },
588            
589            // Hugging Face API tokens
590            AiMlPattern {
591                service: "Hugging Face".to_string(),
592                pattern: Regex::new(r#"(?i)(?:huggingface[_-]?api[_-]?key|huggingface[_-]?token|hf[_-]?token)\s*[=:]\s*["']?(hf_[A-Za-z0-9]{30,})["']?"#)?,
593                severity: SecuritySeverity::High,
594                description: "Hugging Face API token detected".to_string(),
595                api_key_format: "hf_[30+ alphanumeric characters]".to_string(),
596            },
597            
598            // Google AI/Gemini API keys
599            AiMlPattern {
600                service: "Google AI/Gemini".to_string(),
601                pattern: Regex::new(r#"(?i)(?:google[_-]?ai[_-]?api[_-]?key|gemini[_-]?api[_-]?key)\s*[=:]\s*["']?(AIza[A-Za-z0-9_-]{35,})["']?"#)?,
602                severity: SecuritySeverity::Critical,
603                description: "Google AI/Gemini API key detected".to_string(),
604                api_key_format: "AIza[35+ alphanumeric characters with underscores/dashes]".to_string(),
605            },
606            
607            // Cohere API keys
608            AiMlPattern {
609                service: "Cohere".to_string(),
610                pattern: Regex::new(r#"(?i)(?:cohere[_-]?api[_-]?key)\s*[=:]\s*["']?([A-Za-z0-9]{40,})["']?"#)?,
611                severity: SecuritySeverity::High,
612                description: "Cohere API key detected".to_string(),
613                api_key_format: "[40+ alphanumeric characters]".to_string(),
614            },
615            
616            // Replicate API tokens
617            AiMlPattern {
618                service: "Replicate".to_string(),
619                pattern: Regex::new(r#"(?i)(?:replicate[_-]?api[_-]?token|replicate[_-]?token)\s*[=:]\s*["']?(r8_[A-Za-z0-9]{30,})["']?"#)?,
620                severity: SecuritySeverity::High,
621                description: "Replicate API token detected".to_string(),
622                api_key_format: "r8_[30+ alphanumeric characters]".to_string(),
623            },
624            
625            // Stability AI API keys
626            AiMlPattern {
627                service: "Stability AI".to_string(),
628                pattern: Regex::new(r#"(?i)(?:stability[_-]?ai[_-]?api[_-]?key|stable[_-]?diffusion[_-]?api[_-]?key)\s*[=:]\s*["']?(sk-[A-Za-z0-9]{40,})["']?"#)?,
629                severity: SecuritySeverity::High,
630                description: "Stability AI API key detected".to_string(),
631                api_key_format: "sk-[40+ alphanumeric characters]".to_string(),
632            },
633            
634            // DeepSeek API keys
635            AiMlPattern {
636                service: "DeepSeek".to_string(),
637                pattern: Regex::new(r#"(?i)(?:deepseek[_-]?api[_-]?key)\s*[=:]\s*["']?(sk-[A-Za-z0-9]{32,})["']?"#)?,
638                severity: SecuritySeverity::High,
639                description: "DeepSeek API key detected".to_string(),
640                api_key_format: "sk-[32+ alphanumeric characters]".to_string(),
641            },
642            
643            // Mistral AI API keys
644            AiMlPattern {
645                service: "Mistral AI".to_string(),
646                pattern: Regex::new(r#"(?i)(?:mistral[_-]?api[_-]?key)\s*[=:]\s*["']?([A-Za-z0-9]{32,})["']?"#)?,
647                severity: SecuritySeverity::High,
648                description: "Mistral AI API key detected".to_string(),
649                api_key_format: "[32+ alphanumeric characters]".to_string(),
650            },
651            
652            // Together AI API keys
653            AiMlPattern {
654                service: "Together AI".to_string(),
655                pattern: Regex::new(r#"(?i)(?:together[_-]?ai[_-]?api[_-]?key|together[_-]?api[_-]?key)\s*[=:]\s*["']?([A-Za-z0-9]{40,})["']?"#)?,
656                severity: SecuritySeverity::High,
657                description: "Together AI API key detected".to_string(),
658                api_key_format: "[40+ alphanumeric characters]".to_string(),
659            },
660            
661            // Weights & Biases API keys
662            AiMlPattern {
663                service: "Weights & Biases".to_string(),
664                pattern: Regex::new(r#"(?i)(?:wandb[_-]?api[_-]?key|wandb[_-]?key)\s*[=:]\s*["']?([A-Za-z0-9]{40,})["']?"#)?,
665                severity: SecuritySeverity::Medium,
666                description: "Weights & Biases API key detected".to_string(),
667                api_key_format: "[40+ alphanumeric characters]".to_string(),
668            },
669            
670            // MLflow tracking server credentials
671            AiMlPattern {
672                service: "MLflow".to_string(),
673                pattern: Regex::new(r#"(?i)(?:mlflow[_-]?tracking[_-]?username|mlflow[_-]?tracking[_-]?password)\s*[=:]\s*["']?([A-Za-z0-9]{8,})["']?"#)?,
674                severity: SecuritySeverity::Medium,
675                description: "MLflow tracking credentials detected".to_string(),
676                api_key_format: "[8+ alphanumeric characters]".to_string(),
677            },
678        ];
679        
680        Ok(patterns)
681    }
682    
683    /// Initialize cloud service patterns
684    fn initialize_cloud_patterns() -> Result<Vec<CloudPattern>, SecurityError> {
685        let patterns = vec![
686            // AWS Access Keys
687            CloudPattern {
688                provider: "AWS".to_string(),
689                service: "IAM Access Key".to_string(),
690                pattern: Regex::new(r#"(?i)(?:aws[_-]?access[_-]?key[_-]?id)\s*[=:]\s*["']?(AKIA[A-Z0-9]{16})["']?"#)?,
691                severity: SecuritySeverity::Critical,
692                description: "AWS Access Key ID detected".to_string(),
693            },
694            
695            // AWS Secret Access Keys
696            CloudPattern {
697                provider: "AWS".to_string(),
698                service: "IAM Secret Key".to_string(),
699                pattern: Regex::new(r#"(?i)(?:aws[_-]?secret[_-]?access[_-]?key)\s*[=:]\s*["']?([A-Za-z0-9/+=]{40})["']?"#)?,
700                severity: SecuritySeverity::Critical,
701                description: "AWS Secret Access Key detected".to_string(),
702            },
703            
704            // AWS Session Tokens
705            CloudPattern {
706                provider: "AWS".to_string(),
707                service: "Session Token".to_string(),
708                pattern: Regex::new(r#"(?i)(?:aws[_-]?session[_-]?token)\s*[=:]\s*["']?([A-Za-z0-9/+=]{100,})["']?"#)?,
709                severity: SecuritySeverity::High,
710                description: "AWS Session Token detected".to_string(),
711            },
712            
713            // Google Cloud Service Account Keys
714            CloudPattern {
715                provider: "GCP".to_string(),
716                service: "Service Account Key".to_string(),
717                pattern: Regex::new(r#"(?i)(?:google[_-]?application[_-]?credentials|gcp[_-]?service[_-]?account)\s*[=:]\s*["']?([A-Za-z0-9/+=]{50,})["']?"#)?,
718                severity: SecuritySeverity::Critical,
719                description: "Google Cloud Service Account key detected".to_string(),
720            },
721            
722            // Azure Storage Account Keys
723            CloudPattern {
724                provider: "Azure".to_string(),
725                service: "Storage Account Key".to_string(),
726                pattern: Regex::new(r#"(?i)(?:azure[_-]?storage[_-]?account[_-]?key|azure[_-]?storage[_-]?key)\s*[=:]\s*["']?([A-Za-z0-9/+=]{88})["']?"#)?,
727                severity: SecuritySeverity::Critical,
728                description: "Azure Storage Account key detected".to_string(),
729            },
730            
731            // Azure Service Principal
732            CloudPattern {
733                provider: "Azure".to_string(),
734                service: "Service Principal".to_string(),
735                pattern: Regex::new(r#"(?i)(?:azure[_-]?client[_-]?secret|azure[_-]?tenant[_-]?id)\s*[=:]\s*["']?([A-Za-z0-9-]{32,})["']?"#)?,
736                severity: SecuritySeverity::Critical,
737                description: "Azure Service Principal credentials detected".to_string(),
738            },
739            
740            // DigitalOcean API tokens
741            CloudPattern {
742                provider: "DigitalOcean".to_string(),
743                service: "API Token".to_string(),
744                pattern: Regex::new(r#"(?i)(?:digitalocean[_-]?api[_-]?token|do[_-]?api[_-]?token)\s*[=:]\s*["']?(dop_v1_[A-Za-z0-9]{64})["']?"#)?,
745                severity: SecuritySeverity::High,
746                description: "DigitalOcean API token detected".to_string(),
747            },
748            
749            // Heroku API keys
750            CloudPattern {
751                provider: "Heroku".to_string(),
752                service: "API Key".to_string(),
753                pattern: Regex::new(r#"(?i)(?:heroku[_-]?api[_-]?key)\s*[=:]\s*["']?([A-Za-z0-9-]{36})["']?"#)?,
754                severity: SecuritySeverity::High,
755                description: "Heroku API key detected".to_string(),
756            },
757            
758            // Stripe API keys
759            CloudPattern {
760                provider: "Stripe".to_string(),
761                service: "API Key".to_string(),
762                pattern: Regex::new(r#"(?i)(?:stripe[_-]?api[_-]?key|stripe[_-]?secret[_-]?key)\s*[=:]\s*["']?(sk_live_[A-Za-z0-9]{24}|sk_test_[A-Za-z0-9]{24})["']?"#)?,
763                severity: SecuritySeverity::Critical,
764                description: "Stripe API key detected".to_string(),
765            },
766            
767            // Twilio credentials
768            CloudPattern {
769                provider: "Twilio".to_string(),
770                service: "Auth Token".to_string(),
771                pattern: Regex::new(r#"(?i)(?:twilio[_-]?auth[_-]?token|twilio[_-]?account[_-]?sid)\s*[=:]\s*["']?([A-Za-z0-9]{32,34})["']?"#)?,
772                severity: SecuritySeverity::High,
773                description: "Twilio credentials detected".to_string(),
774            },
775        ];
776        
777        Ok(patterns)
778    }
779    
780    /// Initialize framework-specific patterns
781    fn initialize_framework_patterns() -> Result<HashMap<String, Vec<FrameworkPattern>>, SecurityError> {
782        let mut frameworks = HashMap::new();
783        
784        // Django patterns
785        frameworks.insert("django".to_string(), vec![
786            FrameworkPattern {
787                framework: "Django".to_string(),
788                pattern: Regex::new(r#"(?i)(?:database|databases)\s*=\s*\{[^}]*['"']password['"']\s*:\s*['"']([^'"']+)['"'][^}]*\}"#)?,
789                severity: SecuritySeverity::Critical,
790                description: "Django database password in settings".to_string(),
791                file_extensions: vec!["py".to_string()],
792            },
793            FrameworkPattern {
794                framework: "Django".to_string(),
795                pattern: Regex::new(r#"(?i)email[_-]?host[_-]?password\s*=\s*["']([^"']+)["']"#)?,
796                severity: SecuritySeverity::High,
797                description: "Django email password in settings".to_string(),
798                file_extensions: vec!["py".to_string()],
799            },
800        ]);
801        
802        // Flask patterns
803        frameworks.insert("flask".to_string(), vec![
804            FrameworkPattern {
805                framework: "Flask".to_string(),
806                pattern: Regex::new(r#"(?i)app\.config\[['"']([A-Z_]*(?:SECRET|KEY|PASSWORD|TOKEN)[A-Z_]*)['"']\]\s*=\s*["']([^"']+)["']"#)?,
807                severity: SecuritySeverity::High,
808                description: "Flask configuration with potential secret".to_string(),
809                file_extensions: vec!["py".to_string()],
810            },
811        ]);
812        
813        // FastAPI patterns
814        frameworks.insert("fastapi".to_string(), vec![
815            FrameworkPattern {
816                framework: "FastAPI".to_string(),
817                pattern: Regex::new(r#"(?i)class\s+Settings\([^)]*\):[^}]*([A-Z_]*(?:SECRET|KEY|PASSWORD|TOKEN)[A-Z_]*)\s*:\s*str\s*=\s*["']([^"']+)["']"#)?,
818                severity: SecuritySeverity::High,
819                description: "FastAPI Settings class with hardcoded secret".to_string(),
820                file_extensions: vec!["py".to_string()],
821            },
822        ]);
823        
824        Ok(frameworks)
825    }
826    
827    /// Initialize database patterns
828    fn initialize_database_patterns() -> Result<Vec<DatabasePattern>, SecurityError> {
829        let patterns = vec![
830            // PostgreSQL connection strings
831            DatabasePattern {
832                database_type: "PostgreSQL".to_string(),
833                pattern: Regex::new(r#"(?i)postgresql://[^:]+:[^@]+@[^/]+/[^"'\s]+"#)?,
834                severity: SecuritySeverity::Critical,
835                description: "PostgreSQL connection string with credentials".to_string(),
836            },
837            
838            // MySQL connection strings
839            DatabasePattern {
840                database_type: "MySQL".to_string(),
841                pattern: Regex::new(r#"(?i)mysql://[^:]+:[^@]+@[^/]+/[^"'\s]+"#)?,
842                severity: SecuritySeverity::Critical,
843                description: "MySQL connection string with credentials".to_string(),
844            },
845            
846            // MongoDB connection strings
847            DatabasePattern {
848                database_type: "MongoDB".to_string(),
849                pattern: Regex::new(r#"(?i)mongodb://[^:]+:[^@]+@[^/]+/[^"'\s]+"#)?,
850                severity: SecuritySeverity::Critical,
851                description: "MongoDB connection string with credentials".to_string(),
852            },
853            
854            // Redis connection strings
855            DatabasePattern {
856                database_type: "Redis".to_string(),
857                pattern: Regex::new(r#"(?i)redis://[^:]*:[^@]+@[^/]+/[^"'\s]*"#)?,
858                severity: SecuritySeverity::High,
859                description: "Redis connection string with password".to_string(),
860            },
861            
862            // SQLAlchemy database URLs
863            DatabasePattern {
864                database_type: "SQLAlchemy".to_string(),
865                pattern: Regex::new(r#"(?i)sqlalchemy_database_uri\s*=\s*["'][^"']*://[^:]+:[^@]+@[^"']+"#)?,
866                severity: SecuritySeverity::Critical,
867                description: "SQLAlchemy database URI with credentials".to_string(),
868            },
869        ];
870        
871        Ok(patterns)
872    }
873    
874    /// Initialize environment variable patterns specific to Python
875    fn initialize_env_var_patterns() -> Result<Vec<EnvVarPattern>, SecurityError> {
876        let patterns = vec![
877            EnvVarPattern {
878                pattern: Regex::new(r#"os\.environ(?:\.get)?\(['"']([A-Z_]+)['"']\)"#)?,
879                severity: SecuritySeverity::Info,
880                description: "Environment variable usage detected".to_string(),
881                sensitive_prefixes: vec![
882                    "SECRET".to_string(),
883                    "KEY".to_string(),
884                    "PASSWORD".to_string(),
885                    "TOKEN".to_string(),
886                    "API".to_string(),
887                    "AUTH".to_string(),
888                    "PRIVATE".to_string(),
889                    "CREDENTIAL".to_string(),
890                ],
891            },
892            EnvVarPattern {
893                pattern: Regex::new(r#"getenv\(['"']([A-Z_]+)['"']\)"#)?,
894                severity: SecuritySeverity::Info,
895                description: "Environment variable access via getenv".to_string(),
896                sensitive_prefixes: vec![
897                    "SECRET".to_string(),
898                    "KEY".to_string(),
899                    "PASSWORD".to_string(),
900                    "TOKEN".to_string(),
901                ],
902            },
903        ];
904        
905        Ok(patterns)
906    }
907    
908    /// Check environment variable usage patterns
909    fn check_env_var_usage(&self, content: &str, file_path: &Path) -> Result<Vec<SecurityFinding>, SecurityError> {
910        let mut findings = Vec::new();
911        
912        for pattern in &self.env_var_patterns {
913            for (line_num, line) in content.lines().enumerate() {
914                if let Some(captures) = pattern.pattern.captures(line) {
915                    if let Some(var_name) = captures.get(1) {
916                        let var_name = var_name.as_str();
917                        
918                        // Check if this appears to be a sensitive variable
919                        let is_sensitive = pattern.sensitive_prefixes.iter().any(|prefix| {
920                            var_name.to_uppercase().contains(prefix)
921                        });
922                        
923                        if is_sensitive {
924                            // Check if this is properly protected (not hardcoded)
925                            if !line.contains("=") || line.contains("os.environ") || line.contains("getenv") {
926                                // This is good practice - environment variable usage
927                                continue;
928                            }
929                            
930                            let column_number = captures.get(0).map(|m| m.start() + 1);
931                            
932                            findings.push(SecurityFinding {
933                                id: format!("env-var-misuse-{}", line_num),
934                                title: "Potential Environment Variable Misuse".to_string(),
935                                description: format!("Sensitive environment variable '{}' usage detected", var_name),
936                                severity: SecuritySeverity::Medium,
937                                category: SecurityCategory::SecretsExposure,
938                                file_path: Some(file_path.to_path_buf()),
939                                line_number: Some(line_num + 1),
940                                column_number,
941                                evidence: Some(line.trim().to_string()),
942                                remediation: vec![
943                                    "Ensure sensitive environment variables are properly protected".to_string(),
944                                    "Use python-decouple or similar libraries for configuration".to_string(),
945                                    "Document required environment variables".to_string(),
946                                ],
947                                references: vec![
948                                    "https://12factor.net/config".to_string(),
949                                    "https://docs.python.org/3/library/os.html#os.environ".to_string(),
950                                ],
951                                cwe_id: Some("CWE-200".to_string()),
952                                compliance_frameworks: vec!["SOC2".to_string()],
953                            });
954                        }
955                    }
956                }
957            }
958        }
959        
960        Ok(findings)
961    }
962    
963    /// Check for insecure Python practices
964    fn check_insecure_python_practices(&self, content: &str, file_path: &Path) -> Result<Vec<SecurityFinding>, SecurityError> {
965        let mut findings = Vec::new();
966        
967        // Check for eval() usage
968        if let Ok(eval_pattern) = Regex::new(r#"eval\s*\("#) {
969            for (line_num, line) in content.lines().enumerate() {
970                if eval_pattern.is_match(line) {
971                    findings.push(SecurityFinding {
972                        id: format!("insecure-eval-{}", line_num),
973                        title: "Dangerous eval() Usage".to_string(),
974                        description: "Use of eval() function detected - potential code injection risk".to_string(),
975                        severity: SecuritySeverity::High,
976                        category: SecurityCategory::CodeInjection,
977                        file_path: Some(file_path.to_path_buf()),
978                        line_number: Some(line_num + 1),
979                        column_number: None,
980                        evidence: Some(line.trim().to_string()),
981                        remediation: vec![
982                            "Avoid using eval() with user input".to_string(),
983                            "Use ast.literal_eval() for safe evaluation of literals".to_string(),
984                            "Consider using json.loads() for JSON data".to_string(),
985                        ],
986                        references: vec![
987                            "https://owasp.org/www-project-top-ten/2021/A03_2021-Injection/".to_string(),
988                        ],
989                        cwe_id: Some("CWE-95".to_string()),
990                        compliance_frameworks: vec!["SOC2".to_string()],
991                    });
992                }
993            }
994        }
995        
996        // Check for shell injection via subprocess
997        if let Ok(subprocess_pattern) = Regex::new(r#"subprocess\.(call|run|Popen)\([^)]*shell\s*=\s*True"#) {
998            for (line_num, line) in content.lines().enumerate() {
999                if subprocess_pattern.is_match(line) {
1000                    findings.push(SecurityFinding {
1001                        id: format!("shell-injection-{}", line_num),
1002                        title: "Potential Shell Injection".to_string(),
1003                        description: "subprocess call with shell=True detected - potential command injection risk".to_string(),
1004                        severity: SecuritySeverity::High,
1005                        category: SecurityCategory::CommandInjection,
1006                        file_path: Some(file_path.to_path_buf()),
1007                        line_number: Some(line_num + 1),
1008                        column_number: None,
1009                        evidence: Some(line.trim().to_string()),
1010                        remediation: vec![
1011                            "Avoid using shell=True with user input".to_string(),
1012                            "Use subprocess with list arguments instead".to_string(),
1013                            "Validate and sanitize all user inputs".to_string(),
1014                        ],
1015                        references: vec![
1016                            "https://owasp.org/www-project-top-ten/2021/A03_2021-Injection/".to_string(),
1017                        ],
1018                        cwe_id: Some("CWE-78".to_string()),
1019                        compliance_frameworks: vec!["SOC2".to_string()],
1020                    });
1021                }
1022            }
1023        }
1024        
1025        Ok(findings)
1026    }
1027    
1028    /// Detect Python framework based on content
1029    fn detect_python_framework(&self, content: &str) -> Option<String> {
1030        if content.contains("django") || content.contains("Django") {
1031            Some("django".to_string())
1032        } else if content.contains("flask") || content.contains("Flask") {
1033            Some("flask".to_string())
1034        } else if content.contains("fastapi") || content.contains("FastAPI") {
1035            Some("fastapi".to_string())
1036        } else {
1037            None
1038        }
1039    }
1040    
1041    /// Mask sensitive information in evidence
1042    fn mask_secret(&self, secret: &str) -> String {
1043        if secret.len() <= 8 {
1044            "*".repeat(secret.len())
1045        } else {
1046            format!("{}***{}", &secret[..4], &secret[secret.len()-4..])
1047        }
1048    }
1049    
1050    /// Mask database connection string
1051    fn mask_database_connection(&self, connection_str: &str) -> String {
1052        // Replace password in connection string with asterisks
1053        if let Ok(re) = Regex::new(r"://([^:]+):([^@]+)@") {
1054            re.replace(connection_str, "://$1:***@").to_string()
1055        } else {
1056            connection_str.to_string()
1057        }
1058    }
1059    
1060    /// Calculate confidence score based on context indicators
1061    fn calculate_context_confidence(&self, content: &str, indicators: &[String]) -> f32 {
1062        let total_indicators = indicators.len() as f32;
1063        if total_indicators == 0.0 {
1064            return 0.5; // Neutral confidence
1065        }
1066        
1067        let found_indicators = indicators.iter()
1068            .filter(|indicator| content.to_lowercase().contains(&indicator.to_lowercase()))
1069            .count() as f32;
1070        
1071        found_indicators / total_indicators
1072    }
1073    
1074    /// Adjust severity based on context confidence
1075    fn adjust_severity_by_context(&self, base_severity: SecuritySeverity, confidence: f32) -> SecuritySeverity {
1076        match base_severity {
1077            SecuritySeverity::Critical => base_severity, // Keep critical as-is
1078            SecuritySeverity::High => {
1079                if confidence < 0.3 {
1080                    SecuritySeverity::Medium
1081                } else {
1082                    base_severity
1083                }
1084            }
1085            SecuritySeverity::Medium => {
1086                if confidence > 0.7 {
1087                    SecuritySeverity::High
1088                } else if confidence < 0.3 {
1089                    SecuritySeverity::Low
1090                } else {
1091                    base_severity
1092                }
1093            }
1094            _ => base_severity,
1095        }
1096    }
1097    
1098    /// Generate framework-specific remediation advice
1099    fn generate_framework_remediation(&self, framework: &str) -> Vec<String> {
1100        match framework.to_lowercase().as_str() {
1101            "django" => vec![
1102                "Use Django's built-in security features".to_string(),
1103                "Store SECRET_KEY in environment variables".to_string(),
1104                "Use django-environ for configuration management".to_string(),
1105                "Enable Django's security middleware".to_string(),
1106            ],
1107            "flask" => vec![
1108                "Use Flask-Security for authentication".to_string(),
1109                "Store secrets in environment variables".to_string(),
1110                "Use Flask-Talisman for security headers".to_string(),
1111                "Implement proper session management".to_string(),
1112            ],
1113            "fastapi" => vec![
1114                "Use Pydantic Settings for configuration".to_string(),
1115                "Implement proper JWT token management".to_string(),
1116                "Use dependency injection for secrets".to_string(),
1117                "Enable HTTPS and security headers".to_string(),
1118            ],
1119            _ => vec![
1120                "Follow framework-specific security best practices".to_string(),
1121                "Use environment variables for sensitive data".to_string(),
1122            ],
1123        }
1124    }
1125    
1126    /// Enhance a security finding with gitignore risk assessment
1127    fn enhance_finding_with_gitignore_status(
1128        &self,
1129        finding: &mut SecurityFinding,
1130        gitignore_status: &super::gitignore::GitIgnoreStatus,
1131    ) {
1132        // Adjust severity based on gitignore risk
1133        finding.severity = match gitignore_status.risk_level {
1134            GitIgnoreRisk::Tracked => SecuritySeverity::Critical, // Always critical if tracked
1135            GitIgnoreRisk::Exposed => {
1136                // Upgrade severity if exposed
1137                match &finding.severity {
1138                    SecuritySeverity::Medium => SecuritySeverity::High,
1139                    SecuritySeverity::Low => SecuritySeverity::Medium,
1140                    other => other.clone(),
1141                }
1142            }
1143            GitIgnoreRisk::Protected => {
1144                // Downgrade slightly if protected
1145                match &finding.severity {
1146                    SecuritySeverity::Critical => SecuritySeverity::High,
1147                    SecuritySeverity::High => SecuritySeverity::Medium,
1148                    other => other.clone(),
1149                }
1150            }
1151            GitIgnoreRisk::Safe => finding.severity.clone(),
1152        };
1153        
1154        // Add gitignore context to description
1155        finding.description.push_str(&format!(" (GitIgnore: {})", gitignore_status.description()));
1156        
1157        // Add git history warning for tracked files
1158        if gitignore_status.risk_level == GitIgnoreRisk::Tracked {
1159            finding.remediation.insert(0, "⚠️ CRITICAL: Remove this file from git history using git-filter-branch or BFG Repo-Cleaner".to_string());
1160            finding.remediation.insert(1, "🔑 Rotate any exposed secrets immediately".to_string());
1161        }
1162    }
1163    
1164    /// Analyze Python configuration files with gitignore awareness
1165    fn analyze_config_files_with_gitignore(
1166        &self,
1167        project_root: &Path,
1168        gitignore_analyzer: &mut GitIgnoreAnalyzer,
1169    ) -> Result<Vec<SecurityFinding>, SecurityError> {
1170        let mut findings = Vec::new();
1171        
1172        // Python configuration files to check
1173        let config_files = [
1174            "settings.py",      // Django settings
1175            "config.py",        // Flask/general config
1176            "main.py",          // FastAPI main
1177            "app.py",           // Flask app
1178            "manage.py",        // Django management
1179            "wsgi.py",          // WSGI config
1180            "asgi.py",          // ASGI config
1181        ];
1182        
1183        for config_file in &config_files {
1184            let config_path = project_root.join(config_file);
1185            if config_path.exists() {
1186                let gitignore_status = gitignore_analyzer.analyze_file(&config_path);
1187                
1188                if let Ok(content) = fs::read_to_string(&config_path) {
1189                    // Basic secret pattern check for config files
1190                    if self.contains_potential_python_secrets(&content) {
1191                        let mut finding = SecurityFinding {
1192                            id: format!("config-file-{}", config_file.replace('.', "-")),
1193                            title: "Potential Secrets in Python Configuration File".to_string(),
1194                            description: format!("Python configuration file '{}' may contain secrets", config_file),
1195                            severity: SecuritySeverity::Medium,
1196                            category: SecurityCategory::SecretsExposure,
1197                            file_path: Some(config_path.clone()),
1198                            line_number: None,
1199                            column_number: None,
1200                            evidence: None,
1201                            remediation: vec![
1202                                "Review configuration file for hardcoded secrets".to_string(),
1203                                "Use environment variables for sensitive configuration".to_string(),
1204                                "Consider using python-decouple or similar libraries".to_string(),
1205                            ],
1206                            references: vec![
1207                                "https://12factor.net/config".to_string(),
1208                            ],
1209                            cwe_id: Some("CWE-200".to_string()),
1210                            compliance_frameworks: vec!["SOC2".to_string()],
1211                        };
1212                        
1213                        self.enhance_finding_with_gitignore_status(&mut finding, &gitignore_status);
1214                        findings.push(finding);
1215                    }
1216                }
1217            }
1218        }
1219        
1220        Ok(findings)
1221    }
1222    
1223    /// Analyze Python dependency files with gitignore awareness
1224    fn analyze_dependency_files_with_gitignore(
1225        &self,
1226        project_root: &Path,
1227        gitignore_analyzer: &mut GitIgnoreAnalyzer,
1228    ) -> Result<Vec<SecurityFinding>, SecurityError> {
1229        let mut findings = Vec::new();
1230        
1231        // Python dependency files to check
1232        let dependency_files = [
1233            "requirements.txt",
1234            "requirements-dev.txt",
1235            "requirements-prod.txt",
1236            "Pipfile",
1237            "Pipfile.lock",
1238            "pyproject.toml",
1239            "poetry.lock",
1240            "conda-requirements.txt",
1241            "environment.yml",
1242        ];
1243        
1244        for dep_file in &dependency_files {
1245            let dep_path = project_root.join(dep_file);
1246            if dep_path.exists() {
1247                let gitignore_status = gitignore_analyzer.analyze_file(&dep_path);
1248                
1249                // Generally, dependency files should be tracked, but check for any embedded secrets
1250                if let Ok(content) = fs::read_to_string(&dep_path) {
1251                    if self.contains_potential_python_secrets(&content) {
1252                        let mut finding = SecurityFinding {
1253                            id: format!("dependency-file-{}", dep_file.replace('.', "-").replace('-', "_")),
1254                            title: "Potential Secrets in Python Dependency File".to_string(),
1255                            description: format!("Python dependency file '{}' may contain secrets", dep_file),
1256                            severity: SecuritySeverity::High,
1257                            category: SecurityCategory::SecretsExposure,
1258                            file_path: Some(dep_path.clone()),
1259                            line_number: None,
1260                            column_number: None,
1261                            evidence: None,
1262                            remediation: vec![
1263                                "Remove any secrets from dependency files".to_string(),
1264                                "Use environment variables for configuration".to_string(),
1265                                "Review dependency sources for security".to_string(),
1266                            ],
1267                            references: vec![
1268                                "https://pip.pypa.io/en/stable/topics/secure-installs/".to_string(),
1269                            ],
1270                            cwe_id: Some("CWE-200".to_string()),
1271                            compliance_frameworks: vec!["SOC2".to_string()],
1272                        };
1273                        
1274                        self.enhance_finding_with_gitignore_status(&mut finding, &gitignore_status);
1275                        findings.push(finding);
1276                    }
1277                }
1278            }
1279        }
1280        
1281        Ok(findings)
1282    }
1283    
1284    /// Analyze environment files with comprehensive gitignore risk assessment
1285    fn analyze_env_files_with_gitignore(
1286        &self,
1287        project_root: &Path,
1288        gitignore_analyzer: &mut GitIgnoreAnalyzer,
1289    ) -> Result<Vec<SecurityFinding>, SecurityError> {
1290        let mut findings = Vec::new();
1291        
1292        // Get all potential environment files using gitignore analyzer
1293        let env_files = gitignore_analyzer.get_files_to_analyze(&[])
1294            .map_err(|e| SecurityError::Io(e))?
1295            .into_iter()
1296            .filter(|file| {
1297                if let Some(file_name) = file.file_name().and_then(|n| n.to_str()) {
1298                    // Exclude template/example files from security alerts
1299                    if self.is_template_file(file_name) {
1300                        debug!("Skipping template file: {}", file_name);
1301                        return false;
1302                    }
1303                    
1304                    file_name.starts_with(".env") || 
1305                    file_name.contains("credentials") || 
1306                    file_name.contains("secrets") ||
1307                    file_name.ends_with(".key") ||
1308                    file_name.ends_with(".pem") ||
1309                    file_name == "secret.json" ||
1310                    file_name == "service-account.json"
1311                } else {
1312                    false
1313                }
1314            })
1315            .collect::<Vec<_>>();
1316        
1317        for env_file in env_files {
1318            let gitignore_status = gitignore_analyzer.analyze_file(&env_file);
1319            let relative_path = env_file.strip_prefix(project_root)
1320                .unwrap_or(&env_file);
1321            
1322            // Create finding based on gitignore risk assessment
1323            let (severity, title, description) = match gitignore_status.risk_level {
1324                GitIgnoreRisk::Tracked => (
1325                    SecuritySeverity::Critical,
1326                    "Python Secret File Tracked by Git".to_string(),
1327                    format!("Python secret file '{}' is tracked by git and may expose credentials in version history", relative_path.display()),
1328                ),
1329                GitIgnoreRisk::Exposed => (
1330                    SecuritySeverity::High,
1331                    "Python Secret File Not in GitIgnore".to_string(),
1332                    format!("Python secret file '{}' exists but is not protected by .gitignore", relative_path.display()),
1333                ),
1334                GitIgnoreRisk::Protected => (
1335                    SecuritySeverity::Info,
1336                    "Python Secret File Properly Protected".to_string(),
1337                    format!("Python secret file '{}' is properly ignored but detected for verification", relative_path.display()),
1338                ),
1339                GitIgnoreRisk::Safe => continue, // Skip files that appear safe
1340            };
1341            
1342            let mut finding = SecurityFinding {
1343                id: format!("python-env-file-{}", relative_path.to_string_lossy().replace('/', "-").replace('.', "-")),
1344                title,
1345                description,
1346                severity,
1347                category: SecurityCategory::SecretsExposure,
1348                file_path: Some(env_file.clone()),
1349                line_number: None,
1350                column_number: None,
1351                evidence: None,
1352                remediation: vec![
1353                    "Ensure sensitive files are in .gitignore".to_string(),
1354                    "Use .env.example files for documentation".to_string(),
1355                    "Never commit actual environment files to version control".to_string(),
1356                    "Use python-decouple for environment variable management".to_string(),
1357                ],
1358                references: vec![
1359                    "https://github.com/motdotla/dotenv#should-i-commit-my-env-file".to_string(),
1360                    "https://pypi.org/project/python-decouple/".to_string(),
1361                ],
1362                cwe_id: Some("CWE-200".to_string()),
1363                compliance_frameworks: vec!["SOC2".to_string()],
1364            };
1365            
1366            self.enhance_finding_with_gitignore_status(&mut finding, &gitignore_status);
1367            findings.push(finding);
1368        }
1369        
1370        Ok(findings)
1371    }
1372    
1373    /// Check if a file is a template/example file that should be excluded from security alerts
1374    fn is_template_file(&self, file_name: &str) -> bool {
1375        let template_indicators = [
1376            "sample", "example", "template", "template.env", "env.template",
1377            "sample.env", "env.sample", "example.env", "env.example",
1378            "examples", "samples", "templates", "demo", "test", 
1379            ".env.sample", ".env.example", ".env.template", ".env.demo", ".env.test",
1380            "example.json", "sample.json", "template.json"
1381        ];
1382        
1383        let file_name_lower = file_name.to_lowercase();
1384        
1385        // Check for exact matches or contains patterns
1386        template_indicators.iter().any(|indicator| {
1387            file_name_lower == *indicator || 
1388            file_name_lower.contains(indicator) ||
1389            file_name_lower.ends_with(indicator)
1390        })
1391    }
1392    
1393    /// Check if content contains potential Python secrets (basic patterns)
1394    fn contains_potential_python_secrets(&self, content: &str) -> bool {
1395        let secret_indicators = [
1396            "sk_", "pk_live_", "eyJ", "AKIA", "-----BEGIN",
1397            "client_secret", "api_key", "access_token", "SECRET_KEY",
1398            "private_key", "secret_key", "bearer", "password",
1399            "token", "credentials", "auth"
1400        ];
1401        
1402        let content_lower = content.to_lowercase();
1403        secret_indicators.iter().any(|indicator| content_lower.contains(&indicator.to_lowercase()))
1404    }
1405    
1406    /// Generate Python-specific security recommendations
1407    fn generate_python_security_recommendations(&self) -> Vec<String> {
1408        vec![
1409            "🐍 Python Security Best Practices:".to_string(),
1410            "   • Use environment variables for all secrets and configuration".to_string(),
1411            "   • Install python-decouple or python-dotenv for configuration management".to_string(),
1412            "   • Keep requirements.txt and poetry.lock files up to date".to_string(),
1413            "   • Use virtual environments to isolate dependencies".to_string(),
1414            "   • Run 'pip-audit' or 'safety check' to scan for vulnerable packages".to_string(),
1415            "   • Enable Django's security middleware if using Django".to_string(),
1416            "   • Use parameterized queries to prevent SQL injection".to_string(),
1417            "   • Validate and sanitize all user inputs".to_string(),
1418            "   • Use HTTPS in production environments".to_string(),
1419            "   • Implement proper error handling and logging".to_string(),
1420            "   • Consider using tools like bandit for static security analysis".to_string(),
1421        ]
1422    }
1423}
syncable_cli/analyzer/security/python.rs

syncable_cli/analyzer/security/
python.rs