syncable_cli/analyzer/security/
gitignore.rs

1//! # GitIgnore-Aware Security Analysis
2//! 
3//! Comprehensive gitignore parsing and pattern matching for security analysis.
4//! This module ensures that secret detection is gitignore-aware and can properly
5//! assess whether sensitive files are appropriately protected.
6
7use std::collections::HashSet;
8use std::path::{Path, PathBuf};
9use std::fs;
10use log::{info, warn};
11use regex::Regex;
12
13/// GitIgnore pattern matcher for security analysis
14pub struct GitIgnoreAnalyzer {
15    patterns: Vec<GitIgnorePattern>,
16    project_root: PathBuf,
17    is_git_repo: bool,
18}
19
20/// A parsed gitignore pattern with matching logic
21#[derive(Debug, Clone)]
22pub struct GitIgnorePattern {
23    pub original: String,
24    pub regex: Regex,
25    pub is_negation: bool,
26    pub is_directory_only: bool,
27    pub is_absolute: bool, // Starts with /
28    pub pattern_type: PatternType,
29}
30
31#[derive(Debug, Clone, PartialEq)]
32pub enum PatternType {
33    /// Exact filename match (e.g., ".env")
34    Exact,
35    /// Wildcard pattern (e.g., "*.log")
36    Wildcard,
37    /// Directory pattern (e.g., "node_modules/")
38    Directory,
39    /// Path pattern (e.g., "config/*.env")
40    Path,
41}
42
43/// Result of gitignore analysis for a file
44#[derive(Debug, Clone)]
45pub struct GitIgnoreStatus {
46    pub is_ignored: bool,
47    pub matched_pattern: Option<String>,
48    pub is_tracked: bool, // Whether file is tracked by git
49    pub should_be_ignored: bool, // Whether file contains secrets and should be ignored
50    pub risk_level: GitIgnoreRisk,
51}
52
53#[derive(Debug, Clone, PartialEq)]
54pub enum GitIgnoreRisk {
55    /// File is properly ignored and contains no secrets
56    Safe,
57    /// File contains secrets but is properly ignored
58    Protected,
59    /// File contains secrets and is NOT ignored (high risk)
60    Exposed,
61    /// File contains secrets, not ignored, and is tracked by git (critical risk)
62    Tracked,
63}
64
65impl GitIgnoreAnalyzer {
66    pub fn new(project_root: &Path) -> Result<Self, std::io::Error> {
67        let project_root = project_root.canonicalize()?;
68        let is_git_repo = project_root.join(".git").exists();
69        
70        let patterns = if is_git_repo {
71            Self::parse_gitignore_files(&project_root)?
72        } else {
73            Self::create_default_patterns()
74        };
75        
76        info!("Initialized GitIgnore analyzer with {} patterns for {}", 
77              patterns.len(), project_root.display());
78        
79        Ok(Self {
80            patterns,
81            project_root,
82            is_git_repo,
83        })
84    }
85    
86    /// Parse all relevant .gitignore files
87    fn parse_gitignore_files(project_root: &Path) -> Result<Vec<GitIgnorePattern>, std::io::Error> {
88        let mut patterns = Vec::new();
89        
90        // Global gitignore patterns for common secret files
91        patterns.extend(Self::create_default_patterns());
92        
93        // Parse project .gitignore
94        let gitignore_path = project_root.join(".gitignore");
95        if gitignore_path.exists() {
96            let content = fs::read_to_string(&gitignore_path)?;
97            patterns.extend(Self::parse_gitignore_content(&content, project_root)?);
98            info!("Parsed {} patterns from .gitignore", patterns.len());
99        }
100        
101        // TODO: Parse global gitignore (~/.gitignore_global)
102        // TODO: Parse .git/info/exclude
103        
104        Ok(patterns)
105    }
106    
107    /// Create default patterns for common secret files
108    fn create_default_patterns() -> Vec<GitIgnorePattern> {
109        let default_patterns = [
110            ".env",
111            ".env.local",
112            ".env.*.local",
113            ".env.production",
114            ".env.development", 
115            ".env.staging",
116            ".env.test",
117            "*.pem",
118            "*.key",
119            "*.p12",
120            "*.pfx",
121            "id_rsa",
122            "id_dsa",
123            "id_ecdsa",
124            "id_ed25519",
125            ".aws/credentials",
126            ".ssh/",
127            "secrets/",
128            "private/",
129        ];
130        
131        default_patterns.iter()
132            .filter_map(|pattern| Self::parse_pattern(pattern, &PathBuf::from(".")).ok())
133            .collect()
134    }
135    
136    /// Parse gitignore content into patterns
137    fn parse_gitignore_content(content: &str, _root: &Path) -> Result<Vec<GitIgnorePattern>, std::io::Error> {
138        let mut patterns = Vec::new();
139        
140        for (line_num, line) in content.lines().enumerate() {
141            let line = line.trim();
142            
143            // Skip empty lines and comments
144            if line.is_empty() || line.starts_with('#') {
145                continue;
146            }
147            
148            match Self::parse_pattern(line, &PathBuf::from(".")) {
149                Ok(pattern) => patterns.push(pattern),
150                Err(e) => {
151                    warn!("Failed to parse gitignore pattern on line {}: '{}' - {}", line_num + 1, line, e);
152                }
153            }
154        }
155        
156        Ok(patterns)
157    }
158    
159    /// Parse a single gitignore pattern
160    fn parse_pattern(pattern: &str, _root: &Path) -> Result<GitIgnorePattern, regex::Error> {
161        let original = pattern.to_string();
162        let mut pattern = pattern.to_string();
163        
164        // Handle negation
165        let is_negation = pattern.starts_with('!');
166        if is_negation {
167            pattern = pattern[1..].to_string();
168        }
169        
170        // Handle directory-only patterns
171        let is_directory_only = pattern.ends_with('/');
172        if is_directory_only {
173            pattern.pop();
174        }
175        
176        // Handle absolute patterns (starting with /)
177        let is_absolute = pattern.starts_with('/');
178        if is_absolute {
179            pattern = pattern[1..].to_string();
180        }
181        
182        // Determine pattern type
183        let pattern_type = if pattern.contains('/') {
184            PatternType::Path
185        } else if pattern.contains('*') || pattern.contains('?') {
186            PatternType::Wildcard
187        } else if is_directory_only {
188            PatternType::Directory
189        } else {
190            PatternType::Exact
191        };
192        
193        // Convert to regex
194        let regex_pattern = Self::gitignore_to_regex(&pattern, is_absolute, &pattern_type)?;
195        let regex = Regex::new(&regex_pattern)?;
196        
197        Ok(GitIgnorePattern {
198            original,
199            regex,
200            is_negation,
201            is_directory_only,
202            is_absolute,
203            pattern_type,
204        })
205    }
206    
207    /// Convert gitignore pattern to regex
208    fn gitignore_to_regex(pattern: &str, is_absolute: bool, pattern_type: &PatternType) -> Result<String, regex::Error> {
209        let mut regex = String::new();
210        
211        // Start anchor
212        if is_absolute {
213            regex.push_str("^");
214        } else {
215            // Can match anywhere in the path
216            regex.push_str("(?:^|/)");
217        }
218        
219        // Process the pattern
220        for ch in pattern.chars() {
221            match ch {
222                '*' => {
223                    // Check if this is a double star (**)
224                    if pattern.contains("**") {
225                        regex.push_str(".*");
226                    } else {
227                        regex.push_str("[^/]*");
228                    }
229                }
230                '?' => regex.push_str("[^/]"),
231                '.' => regex.push_str("\\."),
232                '^' | '$' | '(' | ')' | '[' | ']' | '{' | '}' | '+' | '|' | '\\' => {
233                    regex.push('\\');
234                    regex.push(ch);
235                }
236                '/' => regex.push_str("/"),
237                _ => regex.push(ch),
238            }
239        }
240        
241        // Handle directory-only patterns
242        match pattern_type {
243            PatternType::Directory => {
244                regex.push_str("(?:/|$)");
245            }
246            PatternType::Exact => {
247                regex.push_str("(?:/|$)");
248            }
249            _ => {
250                regex.push_str("(?:/.*)?$");
251            }
252        }
253        
254        Ok(regex)
255    }
256    
257    /// Check if a file path matches gitignore patterns
258    pub fn analyze_file(&self, file_path: &Path) -> GitIgnoreStatus {
259        let relative_path = match file_path.strip_prefix(&self.project_root) {
260            Ok(rel) => rel,
261            Err(_) => return GitIgnoreStatus {
262                is_ignored: false,
263                matched_pattern: None,
264                is_tracked: false,
265                should_be_ignored: false,
266                risk_level: GitIgnoreRisk::Safe,
267            },
268        };
269        
270        let path_str = relative_path.to_string_lossy();
271        let file_name = file_path.file_name()
272            .and_then(|n| n.to_str())
273            .unwrap_or("");
274        
275        // Check against patterns
276        let mut is_ignored = false;
277        let mut matched_pattern = None;
278        
279        for pattern in &self.patterns {
280            if pattern.regex.is_match(&path_str) {
281                if pattern.is_negation {
282                    is_ignored = false;
283                    matched_pattern = None;
284                } else {
285                    is_ignored = true;
286                    matched_pattern = Some(pattern.original.clone());
287                }
288            }
289        }
290        
291        // Check if file is tracked by git
292        let is_tracked = if self.is_git_repo {
293            self.check_git_tracked(file_path)
294        } else {
295            false
296        };
297        
298        // Determine if file should be ignored (contains secrets)
299        let should_be_ignored = self.should_file_be_ignored(file_path, file_name);
300        
301        // Assess risk level
302        let risk_level = self.assess_risk(is_ignored, is_tracked, should_be_ignored);
303        
304        GitIgnoreStatus {
305            is_ignored,
306            matched_pattern,
307            is_tracked,
308            should_be_ignored,
309            risk_level,
310        }
311    }
312    
313    /// Check if file is tracked by git
314    fn check_git_tracked(&self, file_path: &Path) -> bool {
315        use std::process::Command;
316        
317        Command::new("git")
318            .args(&["ls-files", "--error-unmatch"])
319            .arg(file_path)
320            .current_dir(&self.project_root)
321            .output()
322            .map(|output| output.status.success())
323            .unwrap_or(false)
324    }
325    
326    /// Check if a file should be ignored based on its name/path
327    fn should_file_be_ignored(&self, file_path: &Path, file_name: &str) -> bool {
328        // Common secret file patterns
329        let secret_indicators = [
330            ".env", ".key", ".pem", ".p12", ".pfx", 
331            "id_rsa", "id_dsa", "id_ecdsa", "id_ed25519",
332            "credentials", "secrets", "private"
333        ];
334        
335        let path_str = file_path.to_string_lossy().to_lowercase();
336        let file_name_lower = file_name.to_lowercase();
337        
338        secret_indicators.iter().any(|indicator| {
339            file_name_lower.contains(indicator) || path_str.contains(indicator)
340        })
341    }
342    
343    /// Assess the risk level for a file
344    fn assess_risk(&self, is_ignored: bool, is_tracked: bool, should_be_ignored: bool) -> GitIgnoreRisk {
345        match (should_be_ignored, is_ignored, is_tracked) {
346            // File contains secrets
347            (true, true, _) => GitIgnoreRisk::Protected,      // Ignored (good)
348            (true, false, true) => GitIgnoreRisk::Tracked,    // Not ignored AND tracked (critical)
349            (true, false, false) => GitIgnoreRisk::Exposed,   // Not ignored but not tracked (high risk)
350            // File doesn't contain secrets (or we think it doesn't)
351            (false, _, _) => GitIgnoreRisk::Safe,
352        }
353    }
354    
355    /// Get all files that should be analyzed for secrets
356    pub fn get_files_to_analyze(&self, extensions: &[&str]) -> Result<Vec<PathBuf>, std::io::Error> {
357        let mut files = Vec::new();
358        self.collect_files_recursive(&self.project_root, extensions, &mut files)?;
359        
360        // Filter files that are definitely ignored
361        let files_to_analyze: Vec<PathBuf> = files.into_iter()
362            .filter(|file| {
363                let status = self.analyze_file(file);
364                // Analyze files that are either:
365                // 1. Not ignored (need to check if they should be)
366                // 2. Ignored but we want to verify they don't contain secrets anyway
367                !status.is_ignored || status.should_be_ignored
368            })
369            .collect();
370        
371        info!("Found {} files to analyze for secrets", files_to_analyze.len());
372        Ok(files_to_analyze)
373    }
374    
375    /// Recursively collect files with given extensions
376    fn collect_files_recursive(
377        &self, 
378        dir: &Path, 
379        extensions: &[&str], 
380        files: &mut Vec<PathBuf>
381    ) -> Result<(), std::io::Error> {
382        for entry in fs::read_dir(dir)? {
383            let entry = entry?;
384            let path = entry.path();
385            
386            if path.is_dir() {
387                // Skip obviously ignored directories
388                if let Some(dir_name) = path.file_name().and_then(|n| n.to_str()) {
389                    if matches!(dir_name, ".git" | "node_modules" | "target" | "build" | "dist" | ".next") {
390                        continue;
391                    }
392                }
393                
394                // Check if directory is ignored
395                let status = self.analyze_file(&path);
396                if !status.is_ignored {
397                    self.collect_files_recursive(&path, extensions, files)?;
398                }
399            } else if let Some(ext) = path.extension().and_then(|e| e.to_str()) {
400                if extensions.is_empty() || extensions.contains(&ext) {
401                    files.push(path);
402                }
403            } else {
404                // Files without extensions might still be secret files
405                files.push(path);
406            }
407        }
408        
409        Ok(())
410    }
411    
412    /// Generate recommendations for improving gitignore coverage
413    pub fn generate_gitignore_recommendations(&self, secret_files: &[PathBuf]) -> Vec<String> {
414        let mut recommendations = Vec::new();
415        let mut patterns_to_add = HashSet::new();
416        
417        for file in secret_files {
418            let status = self.analyze_file(file);
419            
420            if status.risk_level == GitIgnoreRisk::Exposed || status.risk_level == GitIgnoreRisk::Tracked {
421                if let Some(file_name) = file.file_name().and_then(|n| n.to_str()) {
422                    // Suggest specific patterns
423                    if file_name.starts_with(".env") {
424                        patterns_to_add.insert(".env*".to_string());
425                    } else if file_name.ends_with(".key") || file_name.ends_with(".pem") {
426                        patterns_to_add.insert("*.key".to_string());
427                        patterns_to_add.insert("*.pem".to_string());
428                    } else {
429                        patterns_to_add.insert(file_name.to_string());
430                    }
431                }
432                
433                if status.risk_level == GitIgnoreRisk::Tracked {
434                    recommendations.push(format!(
435                        "CRITICAL: '{}' contains secrets and is tracked by git! Remove from git history.",
436                        file.display()
437                    ));
438                }
439            }
440        }
441        
442        if !patterns_to_add.is_empty() {
443            recommendations.push("Add these patterns to your .gitignore:".to_string());
444            for pattern in patterns_to_add {
445                recommendations.push(format!("  {}", pattern));
446            }
447        }
448        
449        recommendations
450    }
451}
452
453impl GitIgnoreStatus {
454    /// Get a human-readable description of the status
455    pub fn description(&self) -> String {
456        match self.risk_level {
457            GitIgnoreRisk::Safe => "File appears safe".to_string(),
458            GitIgnoreRisk::Protected => format!(
459                "File contains secrets but is protected (ignored by: {})", 
460                self.matched_pattern.as_deref().unwrap_or("default pattern")
461            ),
462            GitIgnoreRisk::Exposed => "File contains secrets but is NOT in .gitignore!".to_string(),
463            GitIgnoreRisk::Tracked => "CRITICAL: File contains secrets and is tracked by git!".to_string(),
464        }
465    }
466    
467    /// Get recommended action for this file
468    pub fn recommended_action(&self) -> String {
469        match self.risk_level {
470            GitIgnoreRisk::Safe => "No action needed".to_string(),
471            GitIgnoreRisk::Protected => "Verify secrets are still necessary".to_string(),
472            GitIgnoreRisk::Exposed => "Add to .gitignore immediately".to_string(),
473            GitIgnoreRisk::Tracked => "Remove from git history and add to .gitignore".to_string(),
474        }
475    }
476}
477
478#[cfg(test)]
479mod tests {
480    use super::*;
481    use tempfile::TempDir;
482    
483    #[test]
484    fn test_gitignore_pattern_parsing() {
485        let patterns = vec![
486            ".env",
487            "*.log",
488            "/config.json",
489            "secrets/",
490            "!important.env",
491        ];
492        
493        for pattern_str in patterns {
494            let pattern = GitIgnoreAnalyzer::parse_pattern(pattern_str, &PathBuf::from("."));
495            assert!(pattern.is_ok(), "Failed to parse pattern: {}", pattern_str);
496        }
497    }
498    
499    #[test]
500    fn test_pattern_matching() {
501        let temp_dir = TempDir::new().unwrap();
502        let analyzer = GitIgnoreAnalyzer::new(temp_dir.path()).unwrap();
503        
504        // Test exact pattern matching
505        let env_pattern = GitIgnoreAnalyzer::parse_pattern(".env", &PathBuf::from(".")).unwrap();
506        assert!(env_pattern.regex.is_match(".env"));
507        assert!(env_pattern.regex.is_match("subdir/.env"));
508        assert!(!env_pattern.regex.is_match("not-env"));
509    }
510    
511    #[test]
512    fn test_nested_directory_matching() {
513        let temp_dir = TempDir::new().unwrap();
514        let analyzer = GitIgnoreAnalyzer::new(temp_dir.path()).unwrap();
515        
516        // Create a pattern for .env files
517        let env_pattern = GitIgnoreAnalyzer::parse_pattern(".env*", &PathBuf::from(".")).unwrap();
518        
519        // Test various nested scenarios
520        let test_paths = [
521            ".env",
522            "secrets/.env",
523            "config/production/.env.local",
524            "deeply/nested/folder/.env.production",
525        ];
526        
527        for path in &test_paths {
528            assert!(env_pattern.regex.is_match(path), "Pattern should match: {}", path);
529        }
530    }
531}