infiniloom_engine/
default_ignores.rs

1//! Default ignore patterns for Infiniloom
2//!
3//! These patterns are applied by default to exclude common non-essential files
4//! that waste tokens without adding value for LLM context.
5
6/// Default patterns to ignore (dependencies, build outputs, etc.)
7pub const DEFAULT_IGNORES: &[&str] = &[
8    // === Dependencies ===
9    "node_modules/**",
10    "vendor/**",
11    ".venv/**",
12    "venv/**",
13    "__pycache__/**",
14    ".cache/**",
15    ".npm/**",
16    ".yarn/**",
17    ".pnpm/**",
18    "bower_components/**",
19    "jspm_packages/**",
20    // === Build outputs ===
21    "dist/**",
22    "build/**",
23    "out/**",
24    "target/**",
25    "_build/**",
26    ".next/**",
27    ".nuxt/**",
28    ".output/**",
29    ".svelte-kit/**",
30    ".vercel/**",
31    ".netlify/**",
32    // === Minified/bundled files ===
33    "*.min.js",
34    "*.min.css",
35    "*.bundle.js",
36    "*.chunk.js",
37    "*.min.map",
38    // === Generated code ===
39    "*.generated.*",
40    "*.pb.go",
41    "*_generated.go",
42    "*.g.dart",
43    "generated/**",
44    "*.gen.ts",
45    "*.gen.js",
46    "__generated__/**",
47    // === Lock files (large, not useful for understanding) ===
48    "package-lock.json",
49    "yarn.lock",
50    "pnpm-lock.yaml",
51    "Cargo.lock",
52    "poetry.lock",
53    "Gemfile.lock",
54    "composer.lock",
55    "Pipfile.lock",
56    "bun.lockb",
57    "flake.lock",
58    // === Assets (binary or not code) ===
59    "*.svg",
60    "*.png",
61    "*.jpg",
62    "*.jpeg",
63    "*.gif",
64    "*.ico",
65    "*.webp",
66    "*.avif",
67    "*.bmp",
68    "*.tiff",
69    "*.psd",
70    "*.ai",
71    "*.sketch",
72    "*.fig",
73    "*.woff",
74    "*.woff2",
75    "*.ttf",
76    "*.eot",
77    "*.otf",
78    "*.mp3",
79    "*.mp4",
80    "*.wav",
81    "*.ogg",
82    "*.webm",
83    "*.mov",
84    "*.avi",
85    "*.mkv",
86    "*.flv",
87    "*.pdf",
88    // === IDE/Editor ===
89    ".idea/**",
90    ".vscode/**",
91    "*.swp",
92    "*.swo",
93    "*~",
94    ".DS_Store",
95    "Thumbs.db",
96    "*.iml",
97    // === Coverage/Reports ===
98    "coverage/**",
99    ".nyc_output/**",
100    "htmlcov/**",
101    ".coverage",
102    "lcov.info",
103    "*.lcov",
104    // === Logs ===
105    "*.log",
106    "logs/**",
107    "npm-debug.log*",
108    "yarn-debug.log*",
109    "yarn-error.log*",
110    // === Temporary files ===
111    "tmp/**",
112    "temp/**",
113    ".tmp/**",
114    ".temp/**",
115    // === Database files ===
116    "*.db",
117    "*.sqlite",
118    "*.sqlite3",
119    // === Large data files ===
120    "*.csv",
121    "*.parquet",
122    "*.arrow",
123    "*.feather",
124    // === Snapshots (usually large, auto-generated) ===
125    "__snapshots__/**",
126    "*.snap",
127    // === Type definition bundles ===
128    "*.d.ts.map",
129    // === WASM ===
130    "*.wasm",
131    // === Compiled Python ===
132    "*.pyc",
133    "*.pyo",
134    "*.pyd",
135    // === Misc ===
136    ".git/**",
137    ".hg/**",
138    ".svn/**",
139    ".env",
140    ".env.*",
141    "*.bak",
142    "*.backup",
143];
144
145/// Patterns for test files (can be optionally excluded)
146pub const TEST_IGNORES: &[&str] = &[
147    "**/test/**",
148    "**/tests/**",
149    "**/__tests__/**",
150    "**/spec/**",
151    "**/specs/**",
152    "**/*_test.*",
153    "**/*.test.*",
154    "**/*.spec.*",
155    "**/*.fixture.*",
156    "**/*_fixture.*",
157    "**/test_*.*",
158    "**/conftest.py",
159    "**/fixtures/**",
160    "**/mocks/**",
161    "**/__mocks__/**",
162    "**/__fixtures__/**",
163    "**/testdata/**",
164    "**/test-data/**",
165    "**/*_test/**",
166    "**/*.stories.*",
167    "**/*.story.*",
168    // E2E and integration test patterns
169    "**/e2e/**",
170    "**/integration/**",
171    "**/cypress/**",
172    "**/playwright/**",
173];
174
175/// Patterns for documentation (can be optionally excluded)
176pub const DOC_IGNORES: &[&str] = &[
177    "docs/**",
178    "doc/**",
179    "documentation/**",
180    "*.md",
181    "*.mdx",
182    "*.rst",
183    "CHANGELOG*",
184    "HISTORY*",
185    "AUTHORS*",
186    "CONTRIBUTORS*",
187    "CONTRIBUTING*",
188    "CODE_OF_CONDUCT*",
189];
190
191/// Check if a path matches any of the given glob patterns
192pub fn matches_any(path: &str, patterns: &[&str]) -> bool {
193    for pattern in patterns {
194        if let Ok(glob) = glob::Pattern::new(pattern) {
195            if glob.matches(path) {
196                return true;
197            }
198        }
199        // Also check if pattern matches any path component
200        if let Some(suffix) = pattern.strip_prefix("**/") {
201            if let Ok(glob) = glob::Pattern::new(suffix) {
202                // Check against each component and suffix of path
203                for (i, _) in path.match_indices('/') {
204                    if glob.matches(&path[i + 1..]) {
205                        return true;
206                    }
207                }
208                if glob.matches(path) {
209                    return true;
210                }
211            }
212        }
213    }
214    false
215}
216
217/// Filter files based on default ignore patterns
218pub fn filter_default_ignores<'a>(
219    files: impl Iterator<Item = &'a str>,
220    include_tests: bool,
221    include_docs: bool,
222) -> Vec<&'a str> {
223    files
224        .filter(|path| {
225            // Always apply default ignores
226            if matches_any(path, DEFAULT_IGNORES) {
227                return false;
228            }
229
230            // Optionally filter tests
231            if !include_tests && matches_any(path, TEST_IGNORES) {
232                return false;
233            }
234
235            // Optionally filter docs
236            if !include_docs && matches_any(path, DOC_IGNORES) {
237                return false;
238            }
239
240            true
241        })
242        .collect()
243}
244
245#[cfg(test)]
246mod tests {
247    use super::*;
248
249    #[test]
250    fn test_default_ignores() {
251        assert!(matches_any("node_modules/foo/bar.js", DEFAULT_IGNORES));
252        assert!(matches_any("dist/bundle.js", DEFAULT_IGNORES));
253        assert!(matches_any("package-lock.json", DEFAULT_IGNORES));
254        assert!(matches_any("foo.min.js", DEFAULT_IGNORES));
255        assert!(matches_any("generated/types.ts", DEFAULT_IGNORES));
256
257        assert!(!matches_any("src/index.ts", DEFAULT_IGNORES));
258        assert!(!matches_any("lib/utils.py", DEFAULT_IGNORES));
259    }
260
261    #[test]
262    fn test_test_ignores() {
263        assert!(matches_any("src/__tests__/foo.test.ts", TEST_IGNORES));
264        assert!(matches_any("tests/unit/test_foo.py", TEST_IGNORES));
265        assert!(matches_any("spec/models/user_spec.rb", TEST_IGNORES));
266
267        // Fixture file patterns (issue: .fixture.go files appearing in results)
268        assert!(matches_any("pkg/tools/ReadFile.fixture.go", TEST_IGNORES));
269        assert!(matches_any("internal/something_fixture.ts", TEST_IGNORES));
270        assert!(matches_any("src/api.fixture.json", TEST_IGNORES));
271
272        // E2E and integration patterns
273        assert!(matches_any("e2e/login.spec.ts", TEST_IGNORES));
274        assert!(matches_any("cypress/integration/app.cy.ts", TEST_IGNORES));
275        assert!(matches_any("playwright/tests/smoke.ts", TEST_IGNORES));
276
277        assert!(!matches_any("src/index.ts", TEST_IGNORES));
278    }
279
280    #[test]
281    fn test_filter() {
282        let files = vec![
283            "src/index.ts",
284            "src/utils.ts",
285            "node_modules/foo/index.js",
286            "tests/test_main.py",
287            "docs/README.md",
288            "package-lock.json",
289        ];
290
291        let filtered = filter_default_ignores(files.into_iter(), false, true);
292        assert_eq!(filtered, vec!["src/index.ts", "src/utils.ts", "docs/README.md"]);
293    }
294}