infiniloom_engine/
default_ignores.rs

1//! Default ignore patterns for Infiniloom
2//!
3//! These patterns are applied by default to exclude common non-essential files
4//! that waste tokens without adding value for LLM context.
5
6/// Default patterns to ignore (dependencies, build outputs, etc.)
7pub const DEFAULT_IGNORES: &[&str] = &[
8    // === Dependencies ===
9    "node_modules/**",
10    "vendor/**",
11    ".venv/**",
12    "venv/**",
13    "__pycache__/**",
14    ".cache/**",
15    ".npm/**",
16    ".yarn/**",
17    ".pnpm/**",
18    "bower_components/**",
19    "jspm_packages/**",
20    // === Build outputs ===
21    "dist/**",
22    "build/**",
23    "out/**",
24    "target/**",
25    "_build/**",
26    ".next/**",
27    ".nuxt/**",
28    ".output/**",
29    ".svelte-kit/**",
30    ".vercel/**",
31    ".netlify/**",
32    // === Minified/bundled files ===
33    "*.min.js",
34    "*.min.css",
35    "*.bundle.js",
36    "*.chunk.js",
37    "*.min.map",
38    // === Generated code ===
39    "*.generated.*",
40    "*.pb.go",
41    "*_generated.go",
42    "*.g.dart",
43    "generated/**",
44    "*.gen.ts",
45    "*.gen.js",
46    "__generated__/**",
47    // === Lock files (large, not useful for understanding) ===
48    "package-lock.json",
49    "yarn.lock",
50    "pnpm-lock.yaml",
51    "Cargo.lock",
52    "poetry.lock",
53    "Gemfile.lock",
54    "composer.lock",
55    "Pipfile.lock",
56    "bun.lockb",
57    "flake.lock",
58    // === Assets (binary or not code) ===
59    "*.svg",
60    "*.png",
61    "*.jpg",
62    "*.jpeg",
63    "*.gif",
64    "*.ico",
65    "*.webp",
66    "*.avif",
67    "*.bmp",
68    "*.tiff",
69    "*.psd",
70    "*.ai",
71    "*.sketch",
72    "*.fig",
73    "*.woff",
74    "*.woff2",
75    "*.ttf",
76    "*.eot",
77    "*.otf",
78    "*.mp3",
79    "*.mp4",
80    "*.wav",
81    "*.ogg",
82    "*.webm",
83    "*.mov",
84    "*.avi",
85    "*.mkv",
86    "*.flv",
87    "*.pdf",
88    // === IDE/Editor ===
89    ".idea/**",
90    ".vscode/**",
91    "*.swp",
92    "*.swo",
93    "*~",
94    ".DS_Store",
95    "Thumbs.db",
96    "*.iml",
97    // === Coverage/Reports ===
98    "coverage/**",
99    ".nyc_output/**",
100    "htmlcov/**",
101    ".coverage",
102    "lcov.info",
103    "*.lcov",
104    // === Logs ===
105    "*.log",
106    "logs/**",
107    "npm-debug.log*",
108    "yarn-debug.log*",
109    "yarn-error.log*",
110    // === Temporary files ===
111    "tmp/**",
112    "temp/**",
113    ".tmp/**",
114    ".temp/**",
115    // === Database files ===
116    "*.db",
117    "*.sqlite",
118    "*.sqlite3",
119    // === Large data files ===
120    "*.csv",
121    "*.parquet",
122    "*.arrow",
123    "*.feather",
124    // === Snapshots (usually large, auto-generated) ===
125    "__snapshots__/**",
126    "*.snap",
127    // === Type definition bundles ===
128    "*.d.ts.map",
129    // === WASM ===
130    "*.wasm",
131    // === Compiled Python ===
132    "*.pyc",
133    "*.pyo",
134    "*.pyd",
135    // === Misc ===
136    ".git/**",
137    ".hg/**",
138    ".svn/**",
139    ".env",
140    ".env.*",
141    "*.bak",
142    "*.backup",
143];
144
145/// Patterns for test files (can be optionally excluded)
146pub const TEST_IGNORES: &[&str] = &[
147    "**/test/**",
148    "**/tests/**",
149    "**/__tests__/**",
150    "**/spec/**",
151    "**/specs/**",
152    "**/*_test.*",
153    "**/*.test.*",
154    "**/*.spec.*",
155    "**/test_*.*",
156    "**/conftest.py",
157    "**/fixtures/**",
158    "**/mocks/**",
159    "**/__mocks__/**",
160    "**/__fixtures__/**",
161    "**/testdata/**",
162    "**/test-data/**",
163    "**/*_test/**",
164    "**/*.stories.*",
165    "**/*.story.*",
166];
167
168/// Patterns for documentation (can be optionally excluded)
169pub const DOC_IGNORES: &[&str] = &[
170    "docs/**",
171    "doc/**",
172    "documentation/**",
173    "*.md",
174    "*.mdx",
175    "*.rst",
176    "CHANGELOG*",
177    "HISTORY*",
178    "AUTHORS*",
179    "CONTRIBUTORS*",
180    "CONTRIBUTING*",
181    "CODE_OF_CONDUCT*",
182];
183
184/// Check if a path matches any of the given glob patterns
185pub fn matches_any(path: &str, patterns: &[&str]) -> bool {
186    for pattern in patterns {
187        if let Ok(glob) = glob::Pattern::new(pattern) {
188            if glob.matches(path) {
189                return true;
190            }
191        }
192        // Also check if pattern matches any path component
193        if let Some(suffix) = pattern.strip_prefix("**/") {
194            if let Ok(glob) = glob::Pattern::new(suffix) {
195                // Check against each component and suffix of path
196                for (i, _) in path.match_indices('/') {
197                    if glob.matches(&path[i + 1..]) {
198                        return true;
199                    }
200                }
201                if glob.matches(path) {
202                    return true;
203                }
204            }
205        }
206    }
207    false
208}
209
210/// Filter files based on default ignore patterns
211pub fn filter_default_ignores<'a>(
212    files: impl Iterator<Item = &'a str>,
213    include_tests: bool,
214    include_docs: bool,
215) -> Vec<&'a str> {
216    files
217        .filter(|path| {
218            // Always apply default ignores
219            if matches_any(path, DEFAULT_IGNORES) {
220                return false;
221            }
222
223            // Optionally filter tests
224            if !include_tests && matches_any(path, TEST_IGNORES) {
225                return false;
226            }
227
228            // Optionally filter docs
229            if !include_docs && matches_any(path, DOC_IGNORES) {
230                return false;
231            }
232
233            true
234        })
235        .collect()
236}
237
238#[cfg(test)]
239mod tests {
240    use super::*;
241
242    #[test]
243    fn test_default_ignores() {
244        assert!(matches_any("node_modules/foo/bar.js", DEFAULT_IGNORES));
245        assert!(matches_any("dist/bundle.js", DEFAULT_IGNORES));
246        assert!(matches_any("package-lock.json", DEFAULT_IGNORES));
247        assert!(matches_any("foo.min.js", DEFAULT_IGNORES));
248        assert!(matches_any("generated/types.ts", DEFAULT_IGNORES));
249
250        assert!(!matches_any("src/index.ts", DEFAULT_IGNORES));
251        assert!(!matches_any("lib/utils.py", DEFAULT_IGNORES));
252    }
253
254    #[test]
255    fn test_test_ignores() {
256        assert!(matches_any("src/__tests__/foo.test.ts", TEST_IGNORES));
257        assert!(matches_any("tests/unit/test_foo.py", TEST_IGNORES));
258        assert!(matches_any("spec/models/user_spec.rb", TEST_IGNORES));
259
260        assert!(!matches_any("src/index.ts", TEST_IGNORES));
261    }
262
263    #[test]
264    fn test_filter() {
265        let files = vec![
266            "src/index.ts",
267            "src/utils.ts",
268            "node_modules/foo/index.js",
269            "tests/test_main.py",
270            "docs/README.md",
271            "package-lock.json",
272        ];
273
274        let filtered = filter_default_ignores(files.into_iter(), false, true);
275        assert_eq!(filtered, vec!["src/index.ts", "src/utils.ts", "docs/README.md"]);
276    }
277}