Skip to main content

opendev_runtime/
gitignore.rs

1//! GitIgnore parser — filter files based on .gitignore patterns.
2//!
3//! Parses `.gitignore` files from a root directory and all subdirectories,
4//! supporting nested gitignore overrides. Always ignores common directories
5//! (`.git`, `node_modules`, `__pycache__`, etc.) regardless of patterns.
6
7use std::path::{Path, PathBuf};
8
9use tracing::debug;
10
11/// Directories always ignored regardless of `.gitignore` contents.
12pub const ALWAYS_IGNORE_DIRS: &[&str] = &[
13    // Version control
14    ".git",
15    ".hg",
16    ".svn",
17    ".bzr",
18    "_darcs",
19    ".fossil",
20    // OS generated
21    ".DS_Store",
22    ".Spotlight-V100",
23    ".Trashes",
24    "Thumbs.db",
25    "desktop.ini",
26    "$RECYCLE.BIN",
27    // Python caches
28    "__pycache__",
29    ".pytest_cache",
30    ".mypy_cache",
31    ".pytype",
32    ".pyre",
33    ".hypothesis",
34    ".tox",
35    ".nox",
36    "cython_debug",
37    ".eggs",
38    // Node/JS caches
39    "node_modules",
40    ".npm",
41    ".yarn",
42    ".pnpm-store",
43    ".next",
44    ".nuxt",
45    ".output",
46    ".svelte-kit",
47    ".angular",
48    ".parcel-cache",
49    ".turbo",
50    // IDE/Editor
51    ".idea",
52    ".vscode",
53    ".vs",
54    ".settings",
55    // Java/Kotlin
56    ".gradle",
57    // Elixir
58    "_build",
59    ".elixir_ls",
60    // iOS
61    "Pods",
62    "DerivedData",
63    "xcuserdata",
64    // Ruby
65    ".bundle",
66    // Virtual environments
67    ".venv",
68    "venv",
69    // Misc caches
70    ".cache",
71    ".sass-cache",
72    ".eslintcache",
73    ".stylelintcache",
74    ".tmp",
75    ".temp",
76    "tmp",
77    "temp",
78    // Rust
79    "target",
80];
81
82/// A parsed `.gitignore` pattern.
83#[derive(Debug, Clone)]
84struct GitIgnorePattern {
85    /// The raw pattern string.
86    pattern: String,
87    /// Whether this is a negation pattern (starts with `!`).
88    negated: bool,
89    /// Whether this only matches directories (ends with `/`).
90    dir_only: bool,
91}
92
93/// A `.gitignore` spec loaded from a specific directory.
94#[derive(Debug, Clone)]
95struct GitIgnoreSpec {
96    /// Directory where this `.gitignore` was found.
97    base_dir: PathBuf,
98    /// Parsed patterns from the file.
99    patterns: Vec<GitIgnorePattern>,
100}
101
102/// GitIgnore parser that supports nested `.gitignore` files.
103pub struct GitIgnoreParser {
104    root_dir: PathBuf,
105    specs: Vec<GitIgnoreSpec>,
106}
107
108impl GitIgnoreParser {
109    /// Create a new parser rooted at the given directory.
110    pub fn new(root_dir: &Path) -> Self {
111        let root_dir = root_dir
112            .canonicalize()
113            .unwrap_or_else(|_| root_dir.to_path_buf());
114        let mut parser = Self {
115            root_dir,
116            specs: Vec::new(),
117        };
118        parser.load_gitignore_files();
119        parser
120    }
121
122    /// Check whether a path should be ignored.
123    pub fn is_ignored(&self, path: &Path) -> bool {
124        let abs_path = if path.is_absolute() {
125            path.to_path_buf()
126        } else {
127            self.root_dir.join(path)
128        };
129
130        let rel = match abs_path.strip_prefix(&self.root_dir) {
131            Ok(r) => r,
132            Err(_) => return false,
133        };
134
135        // Check always-ignored directories
136        for component in rel.components() {
137            let s = component.as_os_str().to_string_lossy();
138            if ALWAYS_IGNORE_DIRS.contains(&s.as_ref()) {
139                return true;
140            }
141        }
142
143        // Check gitignore patterns
144        let mut ignored = false;
145        for spec in &self.specs {
146            // Only apply spec if path is under spec's base dir
147            let spec_rel = match abs_path.strip_prefix(&spec.base_dir) {
148                Ok(r) => r,
149                Err(_) => continue,
150            };
151
152            let match_str = spec_rel.to_string_lossy().replace('\\', "/");
153            let is_dir = abs_path.is_dir();
154
155            for pat in &spec.patterns {
156                if pat.dir_only && !is_dir {
157                    continue;
158                }
159
160                if matches_pattern(&pat.pattern, &match_str) {
161                    ignored = !pat.negated;
162                }
163            }
164        }
165
166        ignored
167    }
168
169    /// Check if a directory name is in the always-ignore list.
170    pub fn is_always_ignored(name: &str) -> bool {
171        ALWAYS_IGNORE_DIRS.contains(&name)
172    }
173
174    fn load_gitignore_files(&mut self) {
175        // Load root .gitignore
176        let root_gitignore = self.root_dir.join(".gitignore");
177        if root_gitignore.exists()
178            && let Some(spec) = self.parse_gitignore(&root_gitignore, &self.root_dir.clone())
179        {
180            self.specs.push(spec);
181        }
182
183        // Walk subdirectories
184        self.walk_for_gitignores(&self.root_dir.clone());
185    }
186
187    fn walk_for_gitignores(&mut self, dir: &Path) {
188        let entries = match std::fs::read_dir(dir) {
189            Ok(e) => e,
190            Err(_) => return,
191        };
192
193        for entry in entries.flatten() {
194            let path = entry.path();
195            if !path.is_dir() {
196                continue;
197            }
198
199            let name = path
200                .file_name()
201                .map(|n| n.to_string_lossy().to_string())
202                .unwrap_or_default();
203
204            if ALWAYS_IGNORE_DIRS.contains(&name.as_str()) {
205                continue;
206            }
207
208            let gitignore = path.join(".gitignore");
209            if gitignore.exists()
210                && let Some(spec) = self.parse_gitignore(&gitignore, &path)
211            {
212                self.specs.push(spec);
213            }
214
215            self.walk_for_gitignores(&path);
216        }
217    }
218
219    fn parse_gitignore(&self, gitignore_path: &Path, base_dir: &Path) -> Option<GitIgnoreSpec> {
220        let content = std::fs::read_to_string(gitignore_path).ok()?;
221        let mut patterns = Vec::new();
222
223        for line in content.lines() {
224            let trimmed = line.trim();
225            if trimmed.is_empty() || trimmed.starts_with('#') {
226                continue;
227            }
228
229            let (pattern, negated) = if let Some(rest) = trimmed.strip_prefix('!') {
230                (rest.to_string(), true)
231            } else {
232                (trimmed.to_string(), false)
233            };
234
235            let dir_only = pattern.ends_with('/');
236            let pattern = if dir_only {
237                pattern.trim_end_matches('/').to_string()
238            } else {
239                pattern
240            };
241
242            patterns.push(GitIgnorePattern {
243                pattern,
244                negated,
245                dir_only,
246            });
247        }
248
249        if patterns.is_empty() {
250            debug!("No patterns in {}", gitignore_path.display());
251            return None;
252        }
253
254        Some(GitIgnoreSpec {
255            base_dir: base_dir.to_path_buf(),
256            patterns,
257        })
258    }
259}
260
261/// Simple glob pattern matching (supports `*`, `**`, `?`).
262fn matches_pattern(pattern: &str, path: &str) -> bool {
263    // Handle patterns starting with `/` (root-relative)
264    let pattern = pattern.strip_prefix('/').unwrap_or(pattern);
265
266    if pattern.contains("**") {
267        // ** matches any number of directories
268        let parts: Vec<&str> = pattern.split("**").collect();
269        if parts.len() == 2 {
270            let prefix = parts[0].trim_end_matches('/');
271            let suffix = parts[1].trim_start_matches('/');
272            if prefix.is_empty() && suffix.is_empty() {
273                return true;
274            }
275            if prefix.is_empty() {
276                return path.ends_with(suffix) || simple_match(suffix, path);
277            }
278            if suffix.is_empty() {
279                return path.starts_with(prefix) || simple_match(prefix, path);
280            }
281            // Check if path starts with prefix and ends with suffix
282            return path.contains(prefix) && path.contains(suffix);
283        }
284    }
285
286    // If pattern has no slash, it matches any file with that name
287    if !pattern.contains('/') {
288        // Match against the last component
289        let file_name = path.rsplit('/').next().unwrap_or(path);
290        return simple_match(pattern, file_name) || simple_match(pattern, path);
291    }
292
293    simple_match(pattern, path)
294}
295
296/// Simple wildcard matching (`*` matches anything except `/`, `?` matches one char).
297fn simple_match(pattern: &str, text: &str) -> bool {
298    let p: Vec<char> = pattern.chars().collect();
299    let t: Vec<char> = text.chars().collect();
300    simple_match_impl(&p, &t)
301}
302
303fn simple_match_impl(pattern: &[char], text: &[char]) -> bool {
304    if pattern.is_empty() {
305        return text.is_empty();
306    }
307    if pattern[0] == '*' {
308        // Skip consecutive stars
309        let mut i = 0;
310        while i < pattern.len() && pattern[i] == '*' {
311            i += 1;
312        }
313        if i >= pattern.len() {
314            return true;
315        }
316        for j in 0..=text.len() {
317            if simple_match_impl(&pattern[i..], &text[j..]) {
318                return true;
319            }
320        }
321        return false;
322    }
323    if text.is_empty() {
324        return false;
325    }
326    if pattern[0] == '?' || pattern[0] == text[0] {
327        return simple_match_impl(&pattern[1..], &text[1..]);
328    }
329    false
330}
331
332impl std::fmt::Debug for GitIgnoreParser {
333    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
334        f.debug_struct("GitIgnoreParser")
335            .field("root_dir", &self.root_dir)
336            .field("specs_count", &self.specs.len())
337            .finish()
338    }
339}
340
341#[cfg(test)]
342#[path = "gitignore_tests.rs"]
343mod tests;