1use std::collections::HashSet;
23use std::fs;
24use std::path::Path;
25
26const FALLBACK_EXCLUDES: &[&str] = &[
28 "node_modules",
29 ".git",
30 "dist",
31 "build",
32 ".next",
33 "target",
34 "__pycache__",
35 ".venv",
36 "venv",
37 "vendor",
38 ".turbo",
39 "coverage",
40];
41
42pub fn parse_gitignore(root: &Path) -> Vec<String> {
53 let gitignore_path = root.join(".gitignore");
54
55 let content = match fs::read_to_string(&gitignore_path) {
56 Ok(c) => c,
57 Err(_) => {
58 return FALLBACK_EXCLUDES.iter().map(|s| (*s).to_string()).collect();
59 }
60 };
61
62 let mut seen = HashSet::new();
63 let mut excludes: Vec<String> = Vec::new();
64
65 for raw_line in content.lines() {
66 let line = raw_line.trim();
67
68 if line.is_empty() || line.starts_with('#') {
70 continue;
71 }
72
73 if line.starts_with('!') || line.contains('*') {
75 continue;
76 }
77
78 let normalized = line.trim_start_matches('/').trim_end_matches('/');
80
81 if normalized.is_empty() {
82 continue;
83 }
84
85 if !normalized.contains('/') && seen.insert(normalized.to_string()) {
87 excludes.push(normalized.to_string());
88 }
89 }
90
91 for must_have in &[".git", "node_modules"] {
93 if seen.insert((*must_have).to_string()) {
94 excludes.push((*must_have).to_string());
95 }
96 }
97
98 excludes
99}
100
101pub fn should_skip_path(path: &Path, excludes: &[String]) -> bool {
104 for component in path.components() {
105 let name = component.as_os_str().to_string_lossy();
106 if excludes.iter().any(|ex| name == *ex) {
107 return true;
108 }
109 }
110 false
111}
112
113#[cfg(test)]
114mod tests {
115 use super::*;
116 use std::path::PathBuf;
117 use tempfile::tempdir;
118
119 #[test]
120 fn test_parse_gitignore_basic() {
121 let dir = tempdir().unwrap();
122 fs::write(
123 dir.path().join(".gitignore"),
124 "# comment\nnode_modules/\ndist\n\n*.log\n!important\ntarget/\n",
125 )
126 .unwrap();
127
128 let excludes = parse_gitignore(dir.path());
129 assert!(excludes.contains(&"node_modules".to_string()));
130 assert!(excludes.contains(&"dist".to_string()));
131 assert!(excludes.contains(&"target".to_string()));
132 assert!(excludes.contains(&".git".to_string())); assert!(!excludes.iter().any(|e| e.contains('*')));
135 assert!(!excludes.iter().any(|e| e.starts_with('!')));
136 }
137
138 #[test]
139 fn test_parse_gitignore_missing_file() {
140 let dir = tempdir().unwrap();
141 let excludes = parse_gitignore(dir.path());
142 assert!(excludes.contains(&"node_modules".to_string()));
144 assert!(excludes.contains(&".git".to_string()));
145 assert!(excludes.contains(&"dist".to_string()));
146 }
147
148 #[test]
149 fn test_should_skip_path() {
150 let excludes = vec!["node_modules".to_string(), ".git".to_string()];
151
152 assert!(should_skip_path(
153 &PathBuf::from("src/node_modules/foo.js"),
154 &excludes
155 ));
156 assert!(should_skip_path(&PathBuf::from(".git/config"), &excludes));
157 assert!(!should_skip_path(&PathBuf::from("src/main.rs"), &excludes));
158 }
159
160 #[test]
161 fn test_deduplication() {
162 let dir = tempdir().unwrap();
163 fs::write(
164 dir.path().join(".gitignore"),
165 "node_modules\nnode_modules/\nnode_modules\n",
166 )
167 .unwrap();
168
169 let excludes = parse_gitignore(dir.path());
170 let count = excludes.iter().filter(|e| *e == "node_modules").count();
171 assert_eq!(count, 1, "node_modules should appear exactly once");
172 }
173}