1use std::collections::HashSet;
8use std::path::{Path, PathBuf};
9use std::fs;
10use log::{info, warn};
11use regex::Regex;
12
13pub struct GitIgnoreAnalyzer {
15 patterns: Vec<GitIgnorePattern>,
16 project_root: PathBuf,
17 is_git_repo: bool,
18}
19
20#[derive(Debug, Clone)]
22pub struct GitIgnorePattern {
23 pub original: String,
24 pub regex: Regex,
25 pub is_negation: bool,
26 pub is_directory_only: bool,
27 pub is_absolute: bool, pub pattern_type: PatternType,
29}
30
31#[derive(Debug, Clone, PartialEq)]
32pub enum PatternType {
33 Exact,
35 Wildcard,
37 Directory,
39 Path,
41}
42
43#[derive(Debug, Clone)]
45pub struct GitIgnoreStatus {
46 pub is_ignored: bool,
47 pub matched_pattern: Option<String>,
48 pub is_tracked: bool, pub should_be_ignored: bool, pub risk_level: GitIgnoreRisk,
51}
52
53#[derive(Debug, Clone, PartialEq)]
54pub enum GitIgnoreRisk {
55 Safe,
57 Protected,
59 Exposed,
61 Tracked,
63}
64
65impl GitIgnoreAnalyzer {
66 pub fn new(project_root: &Path) -> Result<Self, std::io::Error> {
67 let project_root = project_root.canonicalize()?;
68 let is_git_repo = project_root.join(".git").exists();
69
70 let patterns = if is_git_repo {
71 Self::parse_gitignore_files(&project_root)?
72 } else {
73 Self::create_default_patterns()
74 };
75
76 info!("Initialized GitIgnore analyzer with {} patterns for {}",
77 patterns.len(), project_root.display());
78
79 Ok(Self {
80 patterns,
81 project_root,
82 is_git_repo,
83 })
84 }
85
86 fn parse_gitignore_files(project_root: &Path) -> Result<Vec<GitIgnorePattern>, std::io::Error> {
88 let mut patterns = Vec::new();
89
90 patterns.extend(Self::create_default_patterns());
92
93 let gitignore_path = project_root.join(".gitignore");
95 if gitignore_path.exists() {
96 let content = fs::read_to_string(&gitignore_path)?;
97 patterns.extend(Self::parse_gitignore_content(&content, project_root)?);
98 info!("Parsed {} patterns from .gitignore", patterns.len());
99 }
100
101 Ok(patterns)
105 }
106
107 fn create_default_patterns() -> Vec<GitIgnorePattern> {
109 let default_patterns = [
110 ".env",
111 ".env.local",
112 ".env.*.local",
113 ".env.production",
114 ".env.development",
115 ".env.staging",
116 ".env.test",
117 "*.pem",
118 "*.key",
119 "*.p12",
120 "*.pfx",
121 "id_rsa",
122 "id_dsa",
123 "id_ecdsa",
124 "id_ed25519",
125 ".aws/credentials",
126 ".ssh/",
127 "secrets/",
128 "private/",
129 ];
130
131 default_patterns.iter()
132 .filter_map(|pattern| Self::parse_pattern(pattern, &PathBuf::from(".")).ok())
133 .collect()
134 }
135
136 fn parse_gitignore_content(content: &str, _root: &Path) -> Result<Vec<GitIgnorePattern>, std::io::Error> {
138 let mut patterns = Vec::new();
139
140 for (line_num, line) in content.lines().enumerate() {
141 let line = line.trim();
142
143 if line.is_empty() || line.starts_with('#') {
145 continue;
146 }
147
148 match Self::parse_pattern(line, &PathBuf::from(".")) {
149 Ok(pattern) => patterns.push(pattern),
150 Err(e) => {
151 warn!("Failed to parse gitignore pattern on line {}: '{}' - {}", line_num + 1, line, e);
152 }
153 }
154 }
155
156 Ok(patterns)
157 }
158
159 fn parse_pattern(pattern: &str, _root: &Path) -> Result<GitIgnorePattern, regex::Error> {
161 let original = pattern.to_string();
162 let mut pattern = pattern.to_string();
163
164 let is_negation = pattern.starts_with('!');
166 if is_negation {
167 pattern = pattern[1..].to_string();
168 }
169
170 let is_directory_only = pattern.ends_with('/');
172 if is_directory_only {
173 pattern.pop();
174 }
175
176 let is_absolute = pattern.starts_with('/');
178 if is_absolute {
179 pattern = pattern[1..].to_string();
180 }
181
182 let pattern_type = if pattern.contains('/') {
184 PatternType::Path
185 } else if pattern.contains('*') || pattern.contains('?') {
186 PatternType::Wildcard
187 } else if is_directory_only {
188 PatternType::Directory
189 } else {
190 PatternType::Exact
191 };
192
193 let regex_pattern = Self::gitignore_to_regex(&pattern, is_absolute, &pattern_type)?;
195 let regex = Regex::new(®ex_pattern)?;
196
197 Ok(GitIgnorePattern {
198 original,
199 regex,
200 is_negation,
201 is_directory_only,
202 is_absolute,
203 pattern_type,
204 })
205 }
206
207 fn gitignore_to_regex(pattern: &str, is_absolute: bool, pattern_type: &PatternType) -> Result<String, regex::Error> {
209 let mut regex = String::new();
210
211 if is_absolute {
213 regex.push_str("^");
214 } else {
215 regex.push_str("(?:^|/)");
217 }
218
219 for ch in pattern.chars() {
221 match ch {
222 '*' => {
223 if pattern.contains("**") {
225 regex.push_str(".*");
226 } else {
227 regex.push_str("[^/]*");
228 }
229 }
230 '?' => regex.push_str("[^/]"),
231 '.' => regex.push_str("\\."),
232 '^' | '$' | '(' | ')' | '[' | ']' | '{' | '}' | '+' | '|' | '\\' => {
233 regex.push('\\');
234 regex.push(ch);
235 }
236 '/' => regex.push_str("/"),
237 _ => regex.push(ch),
238 }
239 }
240
241 match pattern_type {
243 PatternType::Directory => {
244 regex.push_str("(?:/|$)");
245 }
246 PatternType::Exact => {
247 regex.push_str("(?:/|$)");
248 }
249 _ => {
250 regex.push_str("(?:/.*)?$");
251 }
252 }
253
254 Ok(regex)
255 }
256
257 pub fn analyze_file(&self, file_path: &Path) -> GitIgnoreStatus {
259 let relative_path = match file_path.strip_prefix(&self.project_root) {
260 Ok(rel) => rel,
261 Err(_) => return GitIgnoreStatus {
262 is_ignored: false,
263 matched_pattern: None,
264 is_tracked: false,
265 should_be_ignored: false,
266 risk_level: GitIgnoreRisk::Safe,
267 },
268 };
269
270 let path_str = relative_path.to_string_lossy();
271 let file_name = file_path.file_name()
272 .and_then(|n| n.to_str())
273 .unwrap_or("");
274
275 let mut is_ignored = false;
277 let mut matched_pattern = None;
278
279 for pattern in &self.patterns {
280 if pattern.regex.is_match(&path_str) {
281 if pattern.is_negation {
282 is_ignored = false;
283 matched_pattern = None;
284 } else {
285 is_ignored = true;
286 matched_pattern = Some(pattern.original.clone());
287 }
288 }
289 }
290
291 let is_tracked = if self.is_git_repo {
293 self.check_git_tracked(file_path)
294 } else {
295 false
296 };
297
298 let should_be_ignored = self.should_file_be_ignored(file_path, file_name);
300
301 let risk_level = self.assess_risk(is_ignored, is_tracked, should_be_ignored);
303
304 GitIgnoreStatus {
305 is_ignored,
306 matched_pattern,
307 is_tracked,
308 should_be_ignored,
309 risk_level,
310 }
311 }
312
313 fn check_git_tracked(&self, file_path: &Path) -> bool {
315 use std::process::Command;
316
317 Command::new("git")
318 .args(&["ls-files", "--error-unmatch"])
319 .arg(file_path)
320 .current_dir(&self.project_root)
321 .output()
322 .map(|output| output.status.success())
323 .unwrap_or(false)
324 }
325
326 fn should_file_be_ignored(&self, file_path: &Path, file_name: &str) -> bool {
328 let secret_indicators = [
330 ".env", ".key", ".pem", ".p12", ".pfx",
331 "id_rsa", "id_dsa", "id_ecdsa", "id_ed25519",
332 "credentials", "secrets", "private"
333 ];
334
335 let path_str = file_path.to_string_lossy().to_lowercase();
336 let file_name_lower = file_name.to_lowercase();
337
338 secret_indicators.iter().any(|indicator| {
339 file_name_lower.contains(indicator) || path_str.contains(indicator)
340 })
341 }
342
343 fn assess_risk(&self, is_ignored: bool, is_tracked: bool, should_be_ignored: bool) -> GitIgnoreRisk {
345 match (should_be_ignored, is_ignored, is_tracked) {
346 (true, true, _) => GitIgnoreRisk::Protected, (true, false, true) => GitIgnoreRisk::Tracked, (true, false, false) => GitIgnoreRisk::Exposed, (false, _, _) => GitIgnoreRisk::Safe,
352 }
353 }
354
355 pub fn get_files_to_analyze(&self, extensions: &[&str]) -> Result<Vec<PathBuf>, std::io::Error> {
357 let mut files = Vec::new();
358 self.collect_files_recursive(&self.project_root, extensions, &mut files)?;
359
360 let files_to_analyze: Vec<PathBuf> = files.into_iter()
362 .filter(|file| {
363 let status = self.analyze_file(file);
364 !status.is_ignored || status.should_be_ignored
368 })
369 .collect();
370
371 info!("Found {} files to analyze for secrets", files_to_analyze.len());
372 Ok(files_to_analyze)
373 }
374
375 fn collect_files_recursive(
377 &self,
378 dir: &Path,
379 extensions: &[&str],
380 files: &mut Vec<PathBuf>
381 ) -> Result<(), std::io::Error> {
382 for entry in fs::read_dir(dir)? {
383 let entry = entry?;
384 let path = entry.path();
385
386 if path.is_dir() {
387 if let Some(dir_name) = path.file_name().and_then(|n| n.to_str()) {
389 if matches!(dir_name, ".git" | "node_modules" | "target" | "build" | "dist" | ".next") {
390 continue;
391 }
392 }
393
394 let status = self.analyze_file(&path);
396 if !status.is_ignored {
397 self.collect_files_recursive(&path, extensions, files)?;
398 }
399 } else if let Some(ext) = path.extension().and_then(|e| e.to_str()) {
400 if extensions.is_empty() || extensions.contains(&ext) {
401 files.push(path);
402 }
403 } else {
404 files.push(path);
406 }
407 }
408
409 Ok(())
410 }
411
412 pub fn generate_gitignore_recommendations(&self, secret_files: &[PathBuf]) -> Vec<String> {
414 let mut recommendations = Vec::new();
415 let mut patterns_to_add = HashSet::new();
416
417 for file in secret_files {
418 let status = self.analyze_file(file);
419
420 if status.risk_level == GitIgnoreRisk::Exposed || status.risk_level == GitIgnoreRisk::Tracked {
421 if let Some(file_name) = file.file_name().and_then(|n| n.to_str()) {
422 if file_name.starts_with(".env") {
424 patterns_to_add.insert(".env*".to_string());
425 } else if file_name.ends_with(".key") || file_name.ends_with(".pem") {
426 patterns_to_add.insert("*.key".to_string());
427 patterns_to_add.insert("*.pem".to_string());
428 } else {
429 patterns_to_add.insert(file_name.to_string());
430 }
431 }
432
433 if status.risk_level == GitIgnoreRisk::Tracked {
434 recommendations.push(format!(
435 "CRITICAL: '{}' contains secrets and is tracked by git! Remove from git history.",
436 file.display()
437 ));
438 }
439 }
440 }
441
442 if !patterns_to_add.is_empty() {
443 recommendations.push("Add these patterns to your .gitignore:".to_string());
444 for pattern in patterns_to_add {
445 recommendations.push(format!(" {}", pattern));
446 }
447 }
448
449 recommendations
450 }
451}
452
453impl GitIgnoreStatus {
454 pub fn description(&self) -> String {
456 match self.risk_level {
457 GitIgnoreRisk::Safe => "File appears safe".to_string(),
458 GitIgnoreRisk::Protected => format!(
459 "File contains secrets but is protected (ignored by: {})",
460 self.matched_pattern.as_deref().unwrap_or("default pattern")
461 ),
462 GitIgnoreRisk::Exposed => "File contains secrets but is NOT in .gitignore!".to_string(),
463 GitIgnoreRisk::Tracked => "CRITICAL: File contains secrets and is tracked by git!".to_string(),
464 }
465 }
466
467 pub fn recommended_action(&self) -> String {
469 match self.risk_level {
470 GitIgnoreRisk::Safe => "No action needed".to_string(),
471 GitIgnoreRisk::Protected => "Verify secrets are still necessary".to_string(),
472 GitIgnoreRisk::Exposed => "Add to .gitignore immediately".to_string(),
473 GitIgnoreRisk::Tracked => "Remove from git history and add to .gitignore".to_string(),
474 }
475 }
476}
477
478#[cfg(test)]
479mod tests {
480 use super::*;
481 use tempfile::TempDir;
482
483 #[test]
484 fn test_gitignore_pattern_parsing() {
485 let patterns = vec![
486 ".env",
487 "*.log",
488 "/config.json",
489 "secrets/",
490 "!important.env",
491 ];
492
493 for pattern_str in patterns {
494 let pattern = GitIgnoreAnalyzer::parse_pattern(pattern_str, &PathBuf::from("."));
495 assert!(pattern.is_ok(), "Failed to parse pattern: {}", pattern_str);
496 }
497 }
498
499 #[test]
500 fn test_pattern_matching() {
501 let temp_dir = TempDir::new().unwrap();
502 let analyzer = GitIgnoreAnalyzer::new(temp_dir.path()).unwrap();
503
504 let env_pattern = GitIgnoreAnalyzer::parse_pattern(".env", &PathBuf::from(".")).unwrap();
506 assert!(env_pattern.regex.is_match(".env"));
507 assert!(env_pattern.regex.is_match("subdir/.env"));
508 assert!(!env_pattern.regex.is_match("not-env"));
509 }
510
511 #[test]
512 fn test_nested_directory_matching() {
513 let temp_dir = TempDir::new().unwrap();
514 let analyzer = GitIgnoreAnalyzer::new(temp_dir.path()).unwrap();
515
516 let env_pattern = GitIgnoreAnalyzer::parse_pattern(".env*", &PathBuf::from(".")).unwrap();
518
519 let test_paths = [
521 ".env",
522 "secrets/.env",
523 "config/production/.env.local",
524 "deeply/nested/folder/.env.production",
525 ];
526
527 for path in &test_paths {
528 assert!(env_pattern.regex.is_match(path), "Pattern should match: {}", path);
529 }
530 }
531}