Skip to main content

gid_core/
ignore.rs

1//! Ignore list support for GID.
2//!
3//! Similar to .gitignore, allows specifying patterns to skip during extraction.
4
5use std::path::Path;
6use std::fs;
7use anyhow::Result;
8use regex::Regex;
9
10/// Default patterns that are always ignored.
11pub const DEFAULT_IGNORES: &[&str] = &[
12    "node_modules",
13    "target",
14    ".git",
15    "__pycache__",
16    "venv",
17    ".venv",
18    "dist",
19    "build",
20    ".next",
21    ".nuxt",
22    ".cache",
23    ".pytest_cache",
24    ".mypy_cache",
25    "coverage",
26    ".coverage",
27    "*.egg-info",
28    ".tox",
29    ".nox",
30    ".idea",
31    ".vscode",
32    ".DS_Store",
33    "Thumbs.db",
34    "*.pyc",
35    "*.pyo",
36    "*.o",
37    "*.a",
38    "*.so",
39    "*.dylib",
40    "*.dll",
41    "*.class",
42    "*.jar",
43    "*.log",
44    "*.tmp",
45    "*.temp",
46    "*.swp",
47    "*.swo",
48    "*~",
49    "vendor",
50    "deps",
51    "_deps",
52    "CMakeFiles",
53    "cmake-build-*",
54];
55
56/// A compiled ignore pattern.
57#[derive(Debug, Clone)]
58pub struct IgnorePattern {
59    /// Original pattern string
60    pub pattern: String,
61    /// Whether this is a negation pattern (starts with !)
62    pub negated: bool,
63    /// Whether this matches only directories (ends with /)
64    pub dir_only: bool,
65    /// Compiled regex for matching
66    regex: Regex,
67}
68
69impl IgnorePattern {
70    /// Create a new ignore pattern from a gitignore-style pattern string.
71    pub fn new(pattern: &str) -> Result<Self> {
72        let pattern = pattern.trim();
73        
74        // Handle negation
75        let (negated, pattern) = if pattern.starts_with('!') {
76            (true, &pattern[1..])
77        } else {
78            (false, pattern)
79        };
80        
81        // Handle directory-only pattern
82        let (dir_only, pattern) = if pattern.ends_with('/') {
83            (true, &pattern[..pattern.len() - 1])
84        } else {
85            (false, pattern)
86        };
87        
88        // Convert gitignore pattern to regex
89        let regex_pattern = gitignore_to_regex(pattern);
90        let regex = Regex::new(&regex_pattern)?;
91        
92        Ok(Self {
93            pattern: pattern.to_string(),
94            negated,
95            dir_only,
96            regex,
97        })
98    }
99    
100    /// Check if this pattern matches a path.
101    pub fn matches(&self, path: &str, is_dir: bool) -> bool {
102        // Directory-only patterns don't match files
103        if self.dir_only && !is_dir {
104            return false;
105        }
106        
107        // Try matching against the full path and just the filename
108        let filename = Path::new(path)
109            .file_name()
110            .map(|s| s.to_string_lossy())
111            .unwrap_or_default();
112        
113        self.regex.is_match(path) || self.regex.is_match(&filename)
114    }
115}
116
117/// A set of ignore patterns.
118#[derive(Debug, Clone, Default)]
119pub struct IgnoreList {
120    patterns: Vec<IgnorePattern>,
121}
122
123impl IgnoreList {
124    /// Create a new empty ignore list.
125    pub fn new() -> Self {
126        Self::default()
127    }
128    
129    /// Create an ignore list with default patterns.
130    pub fn with_defaults() -> Self {
131        let mut list = Self::new();
132        for pattern in DEFAULT_IGNORES {
133            if let Ok(p) = IgnorePattern::new(pattern) {
134                list.patterns.push(p);
135            }
136        }
137        list
138    }
139    
140    /// Add a pattern to the ignore list.
141    pub fn add(&mut self, pattern: &str) -> Result<()> {
142        let pattern = IgnorePattern::new(pattern)?;
143        self.patterns.push(pattern);
144        Ok(())
145    }
146    
147    /// Add multiple patterns.
148    pub fn add_patterns(&mut self, patterns: &[&str]) -> Result<()> {
149        for pattern in patterns {
150            self.add(pattern)?;
151        }
152        Ok(())
153    }
154    
155    /// Check if a path should be ignored.
156    pub fn should_ignore(&self, path: &str, is_dir: bool) -> bool {
157        let mut ignored = false;
158        
159        for pattern in &self.patterns {
160            if pattern.matches(path, is_dir) {
161                if pattern.negated {
162                    ignored = false;
163                } else {
164                    ignored = true;
165                }
166            }
167        }
168        
169        ignored
170    }
171    
172    /// Check if a path should be ignored (convenience method for paths).
173    pub fn is_ignored(&self, path: &Path) -> bool {
174        let path_str = path.to_string_lossy();
175        let is_dir = path.is_dir();
176        self.should_ignore(&path_str, is_dir)
177    }
178    
179    /// Get all patterns.
180    pub fn patterns(&self) -> &[IgnorePattern] {
181        &self.patterns
182    }
183}
184
185/// Load ignore patterns from a .gidignore file.
186pub fn load_ignore_list(project_dir: &Path) -> IgnoreList {
187    let mut list = IgnoreList::with_defaults();
188    
189    // Try to load .gidignore
190    let gidignore_path = project_dir.join(".gidignore");
191    if let Ok(content) = fs::read_to_string(&gidignore_path) {
192        parse_ignore_file(&content, &mut list);
193    }
194    
195    // Also respect .gitignore if it exists
196    let gitignore_path = project_dir.join(".gitignore");
197    if let Ok(content) = fs::read_to_string(&gitignore_path) {
198        parse_ignore_file(&content, &mut list);
199    }
200    
201    list
202}
203
204/// Parse an ignore file content and add patterns to the list.
205fn parse_ignore_file(content: &str, list: &mut IgnoreList) {
206    for line in content.lines() {
207        let line = line.trim();
208        
209        // Skip empty lines and comments
210        if line.is_empty() || line.starts_with('#') {
211            continue;
212        }
213        
214        // Add pattern (ignore errors for invalid patterns)
215        let _ = list.add(line);
216    }
217}
218
219/// Convert a gitignore-style pattern to a regex pattern.
220fn gitignore_to_regex(pattern: &str) -> String {
221    let mut regex = String::new();
222    let mut chars = pattern.chars().peekable();
223    
224    // Patterns starting with / are anchored to the root
225    let anchored = pattern.starts_with('/');
226    if anchored {
227        regex.push('^');
228        chars.next(); // Skip the leading /
229    }
230    
231    while let Some(c) = chars.next() {
232        match c {
233            '*' => {
234                if chars.peek() == Some(&'*') {
235                    // ** matches everything including /
236                    chars.next();
237                    if chars.peek() == Some(&'/') {
238                        chars.next();
239                        regex.push_str("(.*/)?");
240                    } else {
241                        regex.push_str(".*");
242                    }
243                } else {
244                    // * matches everything except /
245                    regex.push_str("[^/]*");
246                }
247            }
248            '?' => {
249                // ? matches any single character except /
250                regex.push_str("[^/]");
251            }
252            '[' => {
253                // Character class - pass through
254                regex.push('[');
255                while let Some(c) = chars.next() {
256                    if c == ']' {
257                        regex.push(']');
258                        break;
259                    }
260                    if c == '\\' {
261                        regex.push('\\');
262                        if let Some(escaped) = chars.next() {
263                            regex.push(escaped);
264                        }
265                    } else {
266                        regex.push(c);
267                    }
268                }
269            }
270            '\\' => {
271                // Escape next character
272                regex.push('\\');
273                if let Some(escaped) = chars.next() {
274                    regex.push(escaped);
275                }
276            }
277            '.' | '+' | '^' | '$' | '(' | ')' | '{' | '}' | '|' => {
278                // Escape regex special characters
279                regex.push('\\');
280                regex.push(c);
281            }
282            _ => {
283                regex.push(c);
284            }
285        }
286    }
287    
288    // Add end anchor if pattern doesn't contain /
289    if !pattern.contains('/') {
290        // Pattern should match at any level
291        regex = format!("(^|/){}", regex);
292    }
293    
294    regex.push('$');
295    regex
296}
297
298/// Check if a path component should be ignored (quick check for common patterns).
299pub fn is_common_ignore(name: &str) -> bool {
300    matches!(
301        name,
302        "node_modules" | "target" | ".git" | "__pycache__" | 
303        "venv" | ".venv" | "dist" | "build" | ".next" | ".nuxt" |
304        ".cache" | ".pytest_cache" | ".mypy_cache" | "coverage" |
305        ".idea" | ".vscode" | ".DS_Store" | "vendor" | "deps"
306    )
307}
308
309#[cfg(test)]
310mod tests {
311    use super::*;
312    
313    #[test]
314    fn test_pattern_simple() {
315        let pattern = IgnorePattern::new("node_modules").unwrap();
316        assert!(pattern.matches("node_modules", true));
317        assert!(pattern.matches("foo/node_modules", true));
318        assert!(!pattern.matches("my_node_modules", true));
319    }
320    
321    #[test]
322    fn test_pattern_wildcard() {
323        let pattern = IgnorePattern::new("*.pyc").unwrap();
324        assert!(pattern.matches("foo.pyc", false));
325        assert!(pattern.matches("bar/foo.pyc", false));
326        assert!(!pattern.matches("foo.py", false));
327    }
328    
329    #[test]
330    fn test_pattern_doublestar() {
331        let pattern = IgnorePattern::new("**/*.log").unwrap();
332        assert!(pattern.matches("foo.log", false));
333        assert!(pattern.matches("bar/foo.log", false));
334        assert!(pattern.matches("a/b/c/foo.log", false));
335    }
336    
337    #[test]
338    fn test_pattern_dir_only() {
339        let pattern = IgnorePattern::new("build/").unwrap();
340        assert!(pattern.matches("build", true));
341        assert!(!pattern.matches("build", false)); // Doesn't match files
342    }
343    
344    #[test]
345    fn test_pattern_negation() {
346        let mut list = IgnoreList::new();
347        list.add("*.log").unwrap();
348        list.add("!important.log").unwrap();
349        
350        assert!(list.should_ignore("debug.log", false));
351        assert!(!list.should_ignore("important.log", false));
352    }
353    
354    #[test]
355    fn test_default_ignores() {
356        let list = IgnoreList::with_defaults();
357        
358        assert!(list.should_ignore("node_modules", true));
359        assert!(list.should_ignore("target", true));
360        assert!(list.should_ignore(".git", true));
361        assert!(list.should_ignore("__pycache__", true));
362        assert!(list.should_ignore("foo.pyc", false));
363        
364        assert!(!list.should_ignore("src", true));
365        assert!(!list.should_ignore("main.rs", false));
366    }
367    
368    #[test]
369    fn test_is_common_ignore() {
370        assert!(is_common_ignore("node_modules"));
371        assert!(is_common_ignore("target"));
372        assert!(is_common_ignore(".git"));
373        assert!(!is_common_ignore("src"));
374        assert!(!is_common_ignore("main.rs"));
375    }
376}