Skip to main content

sqry_core/config/
project_config.rs

1//! Project-level configuration for sqry
2//!
3//! Implements configuration loading per `02_DESIGN.md` \[M3\]:
4//! - `.sqry-config.toml` search from `index_root` up to filesystem root
5//! - Supports ignore patterns, language hints, indexing limits
6//! - Error handling with graceful fallback to defaults
7//!
8//! # Configuration File Location
9//!
10//! The config file is searched starting from `index_root` and walking UP:
11//! 1. Start at `index_root`
12//! 2. Check for `.sqry-config.toml` in current directory
13//! 3. If found: load and stop; else move to parent
14//! 4. Continue until filesystem root or found
15//! 5. If not found: use default configuration
16//!
17//! # What Configuration Does NOT Control
18//!
19//! Per `PROJECT_ROOT_SPEC.md` Section 4.2:
20//! - Index root selection (always CLI path or mode-derived)
21//! - `ProjectRootMode` (LSP setting only)
22//! - Project boundaries (determined by mode)
23
24use globset::{Glob, GlobSet, GlobSetBuilder};
25use serde::{Deserialize, Serialize};
26use std::collections::HashMap;
27use std::path::{Path, PathBuf};
28use thiserror::Error;
29
30/// Configuration file name
31pub const CONFIG_FILE_NAME: &str = ".sqry-config.toml";
32
33/// Errors that can occur when loading configuration
34#[derive(Debug, Error)]
35pub enum ConfigError {
36    /// Config file not found in ancestry
37    #[error("Configuration file not found")]
38    NotFound,
39
40    /// Config file found but failed to parse
41    #[error("Failed to parse config at {0}: {1}")]
42    ParseError(PathBuf, String),
43
44    /// IO error reading config file
45    #[error("Failed to read config at {0}: {1}")]
46    IoError(PathBuf, std::io::Error),
47}
48
49/// Project-level configuration
50///
51/// Loaded from `.sqry-config.toml` with defaults for missing fields.
52/// See `02_DESIGN.md` \[M3\] for full specification.
53#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Default)]
54#[serde(default)]
55pub struct ProjectConfig {
56    /// Ignore patterns configuration
57    #[serde(default)]
58    pub ignore: IgnoreConfig,
59
60    /// Include patterns (override ignores)
61    #[serde(default)]
62    pub include: IncludeConfig,
63
64    /// Language detection hints
65    #[serde(default)]
66    pub languages: LanguageConfig,
67
68    /// Indexing behavior configuration
69    #[serde(default)]
70    pub indexing: IndexingConfig,
71
72    /// Cache configuration
73    #[serde(default)]
74    pub cache: CacheConfig,
75}
76
77/// Ignore patterns configuration
78///
79/// These paths are excluded from indexing.
80/// Uses gitignore-style syntax.
81#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
82#[serde(default)]
83pub struct IgnoreConfig {
84    /// Glob patterns to ignore (gitignore syntax)
85    ///
86    /// Default patterns include:
87    /// - `node_modules/**`
88    /// - `target/**`
89    /// - `dist/**`
90    /// - `*.min.js`
91    /// - `vendor/**`
92    /// - `.git/**`
93    #[serde(default = "default_ignore_patterns")]
94    pub patterns: Vec<String>,
95}
96
97impl Default for IgnoreConfig {
98    fn default() -> Self {
99        Self {
100            patterns: default_ignore_patterns(),
101        }
102    }
103}
104
105fn default_ignore_patterns() -> Vec<String> {
106    vec![
107        "node_modules/**".to_string(),
108        "target/**".to_string(),
109        "dist/**".to_string(),
110        "*.min.js".to_string(),
111        "vendor/**".to_string(),
112        ".git/**".to_string(),
113        "__pycache__/**".to_string(),
114        ".pytest_cache/**".to_string(),
115        ".mypy_cache/**".to_string(),
116        ".tox/**".to_string(),
117        ".venv/**".to_string(),
118        "venv/**".to_string(),
119        ".gradle/**".to_string(),
120        ".idea/**".to_string(),
121        ".vs/**".to_string(),
122        ".vscode/**".to_string(),
123    ]
124}
125
126/// Include patterns (override ignores)
127#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Default)]
128#[serde(default)]
129pub struct IncludeConfig {
130    /// Glob patterns to include even if they match ignore patterns
131    ///
132    /// Example: `vendor/internal/**` to include internal vendor code
133    #[serde(default)]
134    pub patterns: Vec<String>,
135}
136
137/// Language detection hints
138#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Default)]
139#[serde(default)]
140pub struct LanguageConfig {
141    /// Map file extensions to language IDs
142    ///
143    /// Example: `{ "jsx" = "javascript", "tsx" = "typescript" }`
144    #[serde(default)]
145    pub extensions: HashMap<String, String>,
146
147    /// Map specific filenames to language IDs
148    ///
149    /// Example: `{ "Jenkinsfile" = "groovy", "Dockerfile.*" = "dockerfile" }`
150    #[serde(default)]
151    pub files: HashMap<String, String>,
152}
153
154/// Indexing behavior configuration
155#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
156#[serde(default)]
157pub struct IndexingConfig {
158    /// Maximum file size to index (bytes)
159    ///
160    /// Default: 10 MB (`10_485_760` bytes)
161    #[serde(default = "default_max_file_size")]
162    pub max_file_size: u64,
163
164    /// Maximum depth for directory traversal
165    ///
166    /// Default: 100
167    #[serde(default = "default_max_depth")]
168    pub max_depth: u32,
169
170    /// Enable scope extraction
171    ///
172    /// Default: true
173    #[serde(default = "default_true")]
174    pub enable_scope_extraction: bool,
175
176    /// Enable relation extraction
177    ///
178    /// Default: true
179    #[serde(default = "default_true")]
180    pub enable_relation_extraction: bool,
181
182    /// Custom directories to ignore during repo detection
183    ///
184    /// Extends the default ignored directories list.
185    /// Default: empty (uses `DEFAULT_IGNORED_DIRS` from `path_utils`)
186    #[serde(default)]
187    pub additional_ignored_dirs: Vec<String>,
188}
189
190impl Default for IndexingConfig {
191    fn default() -> Self {
192        Self {
193            max_file_size: default_max_file_size(),
194            max_depth: default_max_depth(),
195            enable_scope_extraction: true,
196            enable_relation_extraction: true,
197            additional_ignored_dirs: Vec::new(),
198        }
199    }
200}
201
202fn default_max_file_size() -> u64 {
203    10_485_760 // 10 MB
204}
205
206fn default_max_depth() -> u32 {
207    100
208}
209
210fn default_true() -> bool {
211    true
212}
213
214/// Cache configuration
215#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
216#[serde(default)]
217pub struct CacheConfig {
218    /// Directory for cache files (relative to `index_root`)
219    ///
220    /// Default: ".sqry-cache"
221    #[serde(default = "default_cache_directory")]
222    pub directory: String,
223
224    /// Enable persistent cache
225    ///
226    /// Default: true
227    #[serde(default = "default_true")]
228    pub persistent: bool,
229}
230
231impl Default for CacheConfig {
232    fn default() -> Self {
233        Self {
234            directory: default_cache_directory(),
235            persistent: true,
236        }
237    }
238}
239
240fn default_cache_directory() -> String {
241    ".sqry-cache".to_string()
242}
243
244impl ProjectConfig {
245    /// Load configuration from a TOML file
246    ///
247    /// # Arguments
248    ///
249    /// * `path` - Path to the config file
250    ///
251    /// # Errors
252    ///
253    /// Returns `ConfigError` if file cannot be read or parsed
254    pub fn load<P: AsRef<Path>>(path: P) -> Result<Self, ConfigError> {
255        let path = path.as_ref();
256
257        let contents = std::fs::read_to_string(path)
258            .map_err(|e| ConfigError::IoError(path.to_path_buf(), e))?;
259
260        toml::from_str(&contents)
261            .map_err(|e| ConfigError::ParseError(path.to_path_buf(), e.to_string()))
262    }
263
264    /// Load configuration by searching from `index_root` up to filesystem root
265    ///
266    /// Implements the search strategy from `02_DESIGN.md` \[M3\]:
267    /// 1. Start at `index_root`
268    /// 2. Check for `.sqry-config.toml`
269    /// 3. If found: load and return
270    /// 4. If not: move to parent and repeat
271    /// 5. If reaching root without finding: return default config
272    ///
273    /// # Arguments
274    ///
275    /// * `index_root` - The project's index root path
276    ///
277    /// # Returns
278    ///
279    /// Configuration loaded from file or default.
280    /// Errors are logged but return default config.
281    pub fn load_from_index_root<P: AsRef<Path>>(index_root: P) -> Self {
282        match Self::try_load_from_index_root(index_root.as_ref()) {
283            Ok(config) => config,
284            Err(ConfigError::NotFound) => {
285                // Silent: use defaults
286                Self::default()
287            }
288            Err(ConfigError::ParseError(path, err)) => {
289                log::warn!(
290                    "Malformed {} at {}: {}. Using defaults.",
291                    CONFIG_FILE_NAME,
292                    path.display(),
293                    err
294                );
295                Self::default()
296            }
297            Err(ConfigError::IoError(path, err)) => {
298                log::warn!(
299                    "Cannot read {} at {}: {}. Using defaults.",
300                    CONFIG_FILE_NAME,
301                    path.display(),
302                    err
303                );
304                Self::default()
305            }
306        }
307    }
308
309    /// Try to load configuration, returning error on failure
310    ///
311    /// Unlike `load_from_index_root`, this returns the error instead of
312    /// falling back to defaults.
313    fn try_load_from_index_root(index_root: &Path) -> Result<Self, ConfigError> {
314        let mut current = index_root;
315
316        loop {
317            let config_path = current.join(CONFIG_FILE_NAME);
318
319            if config_path.exists() {
320                return Self::load(&config_path);
321            }
322
323            // Move to parent directory
324            match current.parent() {
325                Some(parent) if !parent.as_os_str().is_empty() => {
326                    current = parent;
327                }
328                _ => break, // Reached filesystem root
329            }
330        }
331
332        Err(ConfigError::NotFound)
333    }
334
335    /// Get the effective ignored directories list
336    ///
337    /// Combines the default ignored directories with any additional
338    /// directories specified in the configuration.
339    #[must_use]
340    pub fn effective_ignored_dirs(&self) -> Vec<&str> {
341        use crate::project::path_utils::DEFAULT_IGNORED_DIRS;
342
343        let mut dirs: Vec<&str> = DEFAULT_IGNORED_DIRS.to_vec();
344
345        // Add any additional ignored dirs from config
346        for dir in &self.indexing.additional_ignored_dirs {
347            dirs.push(dir.as_str());
348        }
349
350        dirs
351    }
352
353    /// Check if a path matches any ignore pattern
354    ///
355    /// Returns true if the path should be ignored during indexing.
356    /// Uses gitignore-style semantics:
357    /// - Patterns like `node_modules/**` match anywhere in the path
358    /// - Include patterns override ignore patterns
359    ///
360    /// # Arguments
361    ///
362    /// * `path` - Path to check (can be absolute or relative)
363    ///
364    /// # Returns
365    ///
366    /// `true` if the path should be ignored, `false` otherwise
367    #[must_use]
368    pub fn is_ignored(&self, path: &Path) -> bool {
369        // Normalize path for consistent matching across platforms
370        let normalized = normalize_path_for_matching(path);
371
372        // Build ignore matcher (lazily - could be cached for performance)
373        let ignore_set = match build_glob_set(&self.ignore.patterns) {
374            Ok(set) => set,
375            Err(e) => {
376                log::warn!("Invalid ignore pattern: {e}");
377                return false;
378            }
379        };
380
381        // Check if path matches any ignore pattern
382        if ignore_set.is_match(&normalized) {
383            // Check if included (overrides ignore)
384            if !self.include.patterns.is_empty() {
385                let include_set = match build_glob_set(&self.include.patterns) {
386                    Ok(set) => set,
387                    Err(e) => {
388                        log::warn!("Invalid include pattern: {e}");
389                        return true; // Still ignored if include patterns are invalid
390                    }
391                };
392
393                if include_set.is_match(&normalized) {
394                    return false; // Include overrides ignore
395                }
396            }
397            return true;
398        }
399
400        false
401    }
402
403    /// Get the language ID for a file path based on configuration hints
404    ///
405    /// Returns the configured language if found, or None to use default detection.
406    #[must_use]
407    pub fn language_for_path(&self, path: &Path) -> Option<&str> {
408        // Check filename mappings first
409        if let Some(filename) = path.file_name().and_then(|n| n.to_str()) {
410            for (pattern, lang) in &self.languages.files {
411                if glob_match_filename(pattern, filename) {
412                    return Some(lang.as_str());
413                }
414            }
415        }
416
417        // Check extension mappings
418        if let Some(ext) = path.extension().and_then(|e| e.to_str())
419            && let Some(lang) = self.languages.extensions.get(ext)
420        {
421            return Some(lang.as_str());
422        }
423
424        None
425    }
426}
427
428/// Build a `GlobSet` from a list of patterns
429///
430/// For gitignore-style matching, patterns without a leading `/` are converted
431/// to match anywhere in the path by prepending `**/`.
432///
433/// # Arguments
434///
435/// * `patterns` - List of glob patterns (gitignore syntax)
436///
437/// # Returns
438///
439/// A compiled `GlobSet` for efficient matching, or an error if patterns are invalid
440fn build_glob_set(patterns: &[String]) -> Result<GlobSet, globset::Error> {
441    let mut builder = GlobSetBuilder::new();
442
443    for pattern in patterns {
444        // Normalize pattern for gitignore-style matching (may expand to multiple patterns)
445        for normalized in normalize_gitignore_pattern(pattern) {
446            let glob = Glob::new(&normalized)?;
447            builder.add(glob);
448        }
449    }
450
451    builder.build()
452}
453
454/// Normalize a gitignore-style pattern for globset matching
455///
456/// Gitignore semantics:
457/// - `/build` → rooted pattern, matches `build/` at root and all contents
458/// - `docs/*.md` → contains `/`, so root-relative (NOT `**/docs/*.md`)
459/// - `node_modules` → no `/`, matches anywhere in tree (`**/node_modules`)
460/// - `*.bak` → no `/`, matches anywhere (`**/*.bak`)
461/// - `build/` → trailing `/` only, directory anywhere (`**/build/**`)
462///
463/// Returns a Vec because directory patterns expand to multiple globs
464/// (e.g., `/build` → `["build", "build/**"]` to match both entry and contents)
465fn normalize_gitignore_pattern(pattern: &str) -> Vec<String> {
466    // Handle rooted patterns (starting with `/`)
467    if let Some(stripped) = pattern.strip_prefix('/') {
468        return normalize_rooted_pattern(stripped);
469    }
470
471    // If pattern already starts with `**/`, use as-is
472    if pattern.starts_with("**/") {
473        return vec![pattern.to_string()];
474    }
475
476    // For checking root-relative patterns, we need to see if there's a path separator
477    // EXCLUDING trailing `/**` or `/` which are special suffixes, not path separators
478    // e.g., `node_modules/**` should match anywhere, but `docs/*.md` is root-relative
479    let pattern_core = pattern
480        .strip_suffix("/**")
481        .or_else(|| pattern.strip_suffix('/'))
482        .unwrap_or(pattern);
483
484    // If the core pattern contains `/`, it's a path pattern (root-relative per gitignore)
485    if pattern_core.contains('/') {
486        // Root-relative pattern like `docs/*.md` or `src/build/`
487        // Don't prepend `**/`
488        if pattern.ends_with('/') && !pattern.ends_with("/**") {
489            // Directory pattern with trailing `/` - match entry and contents
490            let dir_name = pattern.trim_end_matches('/');
491            return vec![dir_name.to_string(), format!("{dir_name}/**")];
492        }
493        return vec![pattern.to_string()];
494    }
495
496    // Simple pattern - matches anywhere in tree
497    // Handle different suffixes:
498    if pattern.ends_with("/**") {
499        // `node_modules/**` → `**/node_modules/**`
500        return vec![format!("**/{pattern}")];
501    }
502
503    if pattern.ends_with('/') {
504        // Directory pattern like `build/` → match `**/build` and `**/build/**`
505        let dir_name = pattern.trim_end_matches('/');
506        return vec![format!("**/{dir_name}"), format!("**/{dir_name}/**")];
507    }
508
509    // Simple name or extension pattern
510    vec![format!("**/{pattern}")]
511}
512
513/// Normalize a rooted pattern (one that started with `/`)
514fn normalize_rooted_pattern(pattern: &str) -> Vec<String> {
515    // Already has `/**` suffix - use as-is
516    if pattern.ends_with("/**") {
517        return vec![pattern.to_string()];
518    }
519
520    // Has trailing `/` - it's explicitly a directory, match entry and contents
521    if pattern.ends_with('/') {
522        let dir_name = pattern.trim_end_matches('/');
523        return vec![dir_name.to_string(), format!("{dir_name}/**")];
524    }
525
526    // Check if it looks like a file vs a directory
527    // - Glob patterns with `*`, `?`, `[` are likely file patterns
528    // - Names with a file extension (e.g., `.json`, `.txt`) are files
529    // - Hidden directories (starting with `.`) like `.git`, `.sqry-cache` are directories
530    let last_segment = pattern.rsplit(['/', '\\']).next().unwrap_or(pattern);
531
532    // Check for glob wildcards - treat as file pattern
533    let has_glob =
534        last_segment.contains('*') || last_segment.contains('?') || last_segment.contains('[');
535
536    if has_glob {
537        return vec![pattern.to_string()];
538    }
539
540    // Check for file extension - determine if this looks like a file or directory
541    // - Files have extensions: `config.json`, `.env.local`, `.gitignore`
542    // - Directories are simple names or hidden: `.git`, `.vscode`, `.sqry-cache`
543    //
544    // For hidden names (starting with `.`):
545    // - Check for common dotfile suffixes that indicate files
546    // - Otherwise assume directory
547    let looks_like_file = if let Some(name) = last_segment.strip_prefix('.') {
548        // Hidden name - check for known dotfile patterns that are files
549        // Use suffix matches with special-case for .config directory (iter5 fix)
550        //
551        // Files: .gitignore, .gitattributes, .editorconfig, .gitconfig, .eslintrc, .prettierrc
552        // Directories: .git, .svn, .vscode, .cache, .config, .sqry-cache, .npm
553        name.ends_with("ignore") // .gitignore, .dockerignore, .eslintignore
554            || name.ends_with("rc") // .eslintrc, .prettierrc, .bashrc
555            || name.ends_with("attributes") // .gitattributes
556            || name.ends_with("modules") // .gitmodules
557            // *config files (except .config directory itself)
558            // .gitconfig, .editorconfig → file; .config → directory
559            || (name != "config" && name.ends_with("config"))
560            || name.contains('.') // .env.local, .npmrc.bak have inner dot
561    } else if let Some(dot_pos) = last_segment.rfind('.') {
562        // Regular name with dot - check if it's an extension
563        dot_pos > 0 && dot_pos < last_segment.len() - 1
564    } else {
565        false
566    };
567
568    if looks_like_file {
569        // File pattern - just return as-is
570        vec![pattern.to_string()]
571    } else {
572        // Directory pattern (including hidden dirs like .git) - return both entry and contents
573        vec![pattern.to_string(), format!("{pattern}/**")]
574    }
575}
576
577/// Simple filename-only glob matching for language configuration
578///
579/// Uses globset for proper glob semantics on just the filename component.
580fn glob_match_filename(pattern: &str, filename: &str) -> bool {
581    // For simple extension patterns like `*.jsx`, match against filename
582    match Glob::new(pattern) {
583        Ok(glob) => glob.compile_matcher().is_match(filename),
584        Err(_) => pattern == filename,
585    }
586}
587
588/// Normalize a path to use forward slashes and strip leading `/`
589///
590/// This ensures consistent matching across platforms and allows unanchored patterns
591/// to match absolute paths. The leading `/` is stripped so that patterns like
592/// `**/node_modules/**` can match `/home/user/project/node_modules/foo`.
593fn normalize_path_for_matching(path: &Path) -> String {
594    // Convert to string with forward slashes
595    let path_str = path.to_string_lossy();
596
597    // Normalize path separators (Windows uses `\`)
598    let normalized = path_str.replace('\\', "/");
599
600    // Strip leading `/` from absolute paths for consistent matching
601    normalized
602        .strip_prefix('/')
603        .unwrap_or(&normalized)
604        .to_string()
605}
606
607#[cfg(test)]
608mod tests {
609    use super::*;
610    use tempfile::TempDir;
611
612    #[test]
613    fn test_default_config() {
614        let config = ProjectConfig::default();
615
616        // Check defaults
617        assert!(!config.ignore.patterns.is_empty());
618        assert!(config.include.patterns.is_empty());
619        assert!(config.languages.extensions.is_empty());
620        assert_eq!(config.indexing.max_file_size, 10_485_760);
621        assert_eq!(config.indexing.max_depth, 100);
622        assert!(config.indexing.enable_scope_extraction);
623        assert!(config.indexing.enable_relation_extraction);
624        assert_eq!(config.cache.directory, ".sqry-cache");
625        assert!(config.cache.persistent);
626    }
627
628    #[test]
629    fn test_load_config_from_file() {
630        let temp = TempDir::new().unwrap();
631        let config_path = temp.path().join(CONFIG_FILE_NAME);
632
633        let toml_content = r#"
634[ignore]
635patterns = ["custom/**", "*.bak"]
636
637[include]
638patterns = ["custom/important/**"]
639
640[languages]
641extensions = { "jsx" = "javascript" }
642files = { "Jenkinsfile" = "groovy" }
643
644[indexing]
645max_file_size = 5242880
646max_depth = 50
647enable_scope_extraction = false
648additional_ignored_dirs = ["my_vendor"]
649
650[cache]
651directory = ".my-cache"
652persistent = false
653"#;
654
655        std::fs::write(&config_path, toml_content).unwrap();
656
657        let config = ProjectConfig::load(&config_path).unwrap();
658
659        assert_eq!(config.ignore.patterns, vec!["custom/**", "*.bak"]);
660        assert_eq!(config.include.patterns, vec!["custom/important/**"]);
661        assert_eq!(
662            config.languages.extensions.get("jsx"),
663            Some(&"javascript".to_string())
664        );
665        assert_eq!(
666            config.languages.files.get("Jenkinsfile"),
667            Some(&"groovy".to_string())
668        );
669        assert_eq!(config.indexing.max_file_size, 5_242_880);
670        assert_eq!(config.indexing.max_depth, 50);
671        assert!(!config.indexing.enable_scope_extraction);
672        assert_eq!(config.indexing.additional_ignored_dirs, vec!["my_vendor"]);
673        assert_eq!(config.cache.directory, ".my-cache");
674        assert!(!config.cache.persistent);
675    }
676
677    #[test]
678    fn test_load_config_ancestor_walk() {
679        let temp = TempDir::new().unwrap();
680
681        // Create nested directory structure
682        let nested = temp.path().join("level1/level2/level3");
683        std::fs::create_dir_all(&nested).unwrap();
684
685        // Create config at level1
686        let config_path = temp.path().join("level1").join(CONFIG_FILE_NAME);
687        std::fs::write(
688            &config_path,
689            r"
690[indexing]
691max_depth = 42
692",
693        )
694        .unwrap();
695
696        // Load from level3 - should find config at level1
697        let config = ProjectConfig::load_from_index_root(&nested);
698        assert_eq!(config.indexing.max_depth, 42);
699    }
700
701    #[test]
702    fn test_load_config_not_found_uses_defaults() {
703        let temp = TempDir::new().unwrap();
704
705        // No config file exists
706        let config = ProjectConfig::load_from_index_root(temp.path());
707
708        // Should use defaults
709        assert_eq!(config, ProjectConfig::default());
710    }
711
712    #[test]
713    fn test_partial_config_uses_defaults() {
714        let temp = TempDir::new().unwrap();
715        let config_path = temp.path().join(CONFIG_FILE_NAME);
716
717        // Only specify indexing section
718        std::fs::write(
719            &config_path,
720            r"
721[indexing]
722max_depth = 25
723",
724        )
725        .unwrap();
726
727        let config = ProjectConfig::load(&config_path).unwrap();
728
729        // Specified value
730        assert_eq!(config.indexing.max_depth, 25);
731
732        // Defaults for unspecified
733        assert_eq!(config.indexing.max_file_size, 10_485_760);
734        assert!(config.indexing.enable_scope_extraction);
735        assert_eq!(config.cache.directory, ".sqry-cache");
736    }
737
738    #[test]
739    fn test_effective_ignored_dirs() {
740        let mut config = ProjectConfig::default();
741        config.indexing.additional_ignored_dirs =
742            vec!["my_vendor".to_string(), "artifacts".to_string()];
743
744        let dirs = config.effective_ignored_dirs();
745
746        // Should include defaults
747        assert!(dirs.contains(&"node_modules"));
748        assert!(dirs.contains(&"target"));
749
750        // Should include custom
751        assert!(dirs.contains(&"my_vendor"));
752        assert!(dirs.contains(&"artifacts"));
753    }
754
755    #[test]
756    fn test_language_for_path() {
757        let mut config = ProjectConfig::default();
758        config
759            .languages
760            .extensions
761            .insert("jsx".to_string(), "javascript".to_string());
762        config
763            .languages
764            .files
765            .insert("Jenkinsfile".to_string(), "groovy".to_string());
766
767        // Extension match
768        assert_eq!(
769            config.language_for_path(Path::new("src/App.jsx")),
770            Some("javascript")
771        );
772
773        // Filename match
774        assert_eq!(
775            config.language_for_path(Path::new("ci/Jenkinsfile")),
776            Some("groovy")
777        );
778
779        // No match
780        assert_eq!(config.language_for_path(Path::new("src/main.rs")), None);
781    }
782
783    #[test]
784    fn test_glob_match_filename() {
785        // Single wildcard
786        assert!(glob_match_filename("*.js", "app.js"));
787        assert!(!glob_match_filename("*.js", "app.ts"));
788
789        // Question mark
790        assert!(glob_match_filename("file?.txt", "file1.txt"));
791        assert!(!glob_match_filename("file?.txt", "file12.txt"));
792
793        // Exact match
794        assert!(glob_match_filename("Jenkinsfile", "Jenkinsfile"));
795        assert!(!glob_match_filename("Jenkinsfile", "Jenkinsfile.bak"));
796    }
797
798    #[test]
799    fn test_is_ignored_basic() {
800        let config = ProjectConfig::default();
801
802        // Default patterns should ignore common dirs
803        assert!(config.is_ignored(Path::new("node_modules/foo.js")));
804        assert!(config.is_ignored(Path::new("target/debug/binary")));
805        assert!(config.is_ignored(Path::new(".git/config")));
806        assert!(config.is_ignored(Path::new("__pycache__/module.pyc")));
807
808        // Non-ignored paths
809        assert!(!config.is_ignored(Path::new("src/main.rs")));
810        assert!(!config.is_ignored(Path::new("lib/utils.js")));
811    }
812
813    #[test]
814    fn test_is_ignored_nested_paths() {
815        // Key fix: patterns like `node_modules/**` should match nested paths
816        let config = ProjectConfig::default();
817
818        // Nested node_modules should be ignored
819        assert!(config.is_ignored(Path::new("packages/frontend/node_modules/react/index.js")));
820        assert!(config.is_ignored(Path::new("deep/nested/path/node_modules/pkg/lib.js")));
821
822        // Nested target dirs should be ignored
823        assert!(config.is_ignored(Path::new("crates/lib/target/release/libfoo.so")));
824    }
825
826    #[test]
827    fn test_is_ignored_absolute_paths() {
828        // Key fix: patterns should match absolute paths too
829        let config = ProjectConfig::default();
830
831        // Absolute paths with ignored directories
832        assert!(config.is_ignored(Path::new("/home/user/project/node_modules/pkg/index.js")));
833        assert!(config.is_ignored(Path::new("/tmp/build/target/debug/app")));
834        assert!(config.is_ignored(Path::new("/var/repo/.git/objects/pack/abc")));
835    }
836
837    #[test]
838    fn test_is_ignored_include_overrides() {
839        let mut config = ProjectConfig::default();
840        config.ignore.patterns = vec!["vendor/**".to_string()];
841        config.include.patterns = vec!["vendor/internal/**".to_string()];
842
843        // vendor/** should be ignored
844        assert!(config.is_ignored(Path::new("vendor/external/lib.js")));
845        assert!(config.is_ignored(Path::new("vendor/third_party/pkg.py")));
846
847        // But vendor/internal/** should be included (override)
848        assert!(!config.is_ignored(Path::new("vendor/internal/core.rs")));
849        assert!(!config.is_ignored(Path::new("vendor/internal/nested/utils.rs")));
850    }
851
852    #[test]
853    fn test_is_ignored_extension_patterns() {
854        let mut config = ProjectConfig::default();
855        config.ignore.patterns = vec!["*.min.js".to_string(), "*.bak".to_string()];
856
857        // Extension patterns should work
858        assert!(config.is_ignored(Path::new("dist/app.min.js")));
859        assert!(config.is_ignored(Path::new("src/old.bak")));
860        assert!(config.is_ignored(Path::new("deeply/nested/file.min.js")));
861
862        // Normal files shouldn't be ignored
863        assert!(!config.is_ignored(Path::new("src/app.js")));
864    }
865
866    #[test]
867    fn test_normalize_gitignore_pattern() {
868        // Simple patterns without `/` get `**/` prepended (match anywhere)
869        assert_eq!(
870            normalize_gitignore_pattern("node_modules/**"),
871            vec!["**/node_modules/**"]
872        );
873        assert_eq!(normalize_gitignore_pattern("*.js"), vec!["**/*.js"]);
874        assert_eq!(normalize_gitignore_pattern("target"), vec!["**/target"]);
875
876        // Patterns starting with `**/` are unchanged
877        assert_eq!(
878            normalize_gitignore_pattern("**/node_modules"),
879            vec!["**/node_modules"]
880        );
881
882        // Rooted patterns (`/build`) expand to entry + contents (fixes HIGH finding)
883        assert_eq!(
884            normalize_gitignore_pattern("/build"),
885            vec!["build", "build/**"]
886        );
887        // Rooted patterns with `/**` already - just strip the `/`
888        assert_eq!(normalize_gitignore_pattern("/dist/**"), vec!["dist/**"]);
889
890        // Rooted file patterns (have `.`) don't get `/**` appended
891        assert_eq!(
892            normalize_gitignore_pattern("/config.json"),
893            vec!["config.json"]
894        );
895        assert_eq!(normalize_gitignore_pattern("/*.txt"), vec!["*.txt"]);
896
897        // Rooted directory with trailing `/` expands to entry + contents
898        assert_eq!(
899            normalize_gitignore_pattern("/build/"),
900            vec!["build", "build/**"]
901        );
902
903        // Patterns with `/` (not leading) are root-relative - NO `**/` prefix (fixes MEDIUM finding)
904        assert_eq!(normalize_gitignore_pattern("docs/*.md"), vec!["docs/*.md"]);
905        assert_eq!(
906            normalize_gitignore_pattern("src/vendor"),
907            vec!["src/vendor"]
908        );
909
910        // Directory patterns with trailing `/` only - match anywhere
911        assert_eq!(
912            normalize_gitignore_pattern("build/"),
913            vec!["**/build", "**/build/**"]
914        );
915    }
916
917    #[test]
918    fn test_build_glob_set() {
919        let patterns = vec!["node_modules/**".to_string(), "*.min.js".to_string()];
920        let glob_set = build_glob_set(&patterns).unwrap();
921
922        // Should match nested paths
923        assert!(glob_set.is_match("src/node_modules/pkg/index.js"));
924        assert!(glob_set.is_match("app.min.js"));
925        assert!(glob_set.is_match("dist/bundle.min.js"));
926
927        // Should not match non-matching paths
928        assert!(!glob_set.is_match("src/main.rs"));
929        assert!(!glob_set.is_match("app.js"));
930    }
931
932    #[test]
933    fn test_rooted_pattern_matches_contents() {
934        // Test for HIGH finding: `/build` should match `build/output.log`
935        let patterns = vec!["/build".to_string()];
936        let glob_set = build_glob_set(&patterns).unwrap();
937
938        // Should match the directory itself
939        assert!(glob_set.is_match("build"));
940        // Should match contents of the directory
941        assert!(glob_set.is_match("build/output.log"));
942        assert!(glob_set.is_match("build/subdir/file.txt"));
943
944        // Should NOT match elsewhere (rooted pattern)
945        assert!(!glob_set.is_match("src/build/output.log"));
946        assert!(!glob_set.is_match("packages/build"));
947    }
948
949    #[test]
950    fn test_slash_containing_patterns_are_root_relative() {
951        // Test for MEDIUM finding: `docs/*.md` should NOT match `packages/foo/docs/readme.md`
952        let patterns = vec!["docs/*.md".to_string()];
953        let glob_set = build_glob_set(&patterns).unwrap();
954
955        // Should match at root
956        assert!(glob_set.is_match("docs/readme.md"));
957        assert!(glob_set.is_match("docs/api.md"));
958
959        // Should NOT match in nested paths (false positives)
960        assert!(!glob_set.is_match("packages/foo/docs/readme.md"));
961        assert!(!glob_set.is_match("src/docs/notes.md"));
962    }
963
964    #[test]
965    fn test_simple_patterns_match_anywhere() {
966        // Simple patterns (no `/`) should match anywhere
967        let patterns = vec!["*.bak".to_string(), "node_modules".to_string()];
968        let glob_set = build_glob_set(&patterns).unwrap();
969
970        // *.bak matches anywhere
971        assert!(glob_set.is_match("file.bak"));
972        assert!(glob_set.is_match("src/file.bak"));
973        assert!(glob_set.is_match("deep/nested/path/file.bak"));
974
975        // node_modules matches anywhere
976        assert!(glob_set.is_match("node_modules"));
977        assert!(glob_set.is_match("packages/frontend/node_modules"));
978    }
979
980    #[test]
981    fn test_dotted_directories_expand_to_contents() {
982        // Test for iter3 MEDIUM finding: `.git`, `.sqry-cache` should be treated as directories
983        // not files, and expand to include contents
984
985        // /.git should match both the directory and its contents
986        assert_eq!(
987            normalize_gitignore_pattern("/.git"),
988            vec![".git", ".git/**"]
989        );
990        assert_eq!(
991            normalize_gitignore_pattern("/.sqry-cache"),
992            vec![".sqry-cache", ".sqry-cache/**"]
993        );
994        assert_eq!(
995            normalize_gitignore_pattern("/.hidden"),
996            vec![".hidden", ".hidden/**"]
997        );
998        // .config is XDG config directory, should expand (iter4 fix)
999        assert_eq!(
1000            normalize_gitignore_pattern("/.config"),
1001            vec![".config", ".config/**"]
1002        );
1003
1004        // *config dotfiles are files, not directories (iter5 fix)
1005        // .gitconfig, .editorconfig → file; .config → directory
1006        assert_eq!(
1007            normalize_gitignore_pattern("/.gitconfig"),
1008            vec![".gitconfig"]
1009        );
1010        assert_eq!(
1011            normalize_gitignore_pattern("/.editorconfig"),
1012            vec![".editorconfig"]
1013        );
1014
1015        // But files with actual extensions should NOT expand
1016        assert_eq!(
1017            normalize_gitignore_pattern("/.gitignore"),
1018            vec![".gitignore"]
1019        );
1020        assert_eq!(
1021            normalize_gitignore_pattern("/config.json"),
1022            vec!["config.json"]
1023        );
1024        assert_eq!(
1025            normalize_gitignore_pattern("/.env.local"),
1026            vec![".env.local"]
1027        );
1028
1029        // Verify via GlobSet that .git contents are matched
1030        let patterns = vec!["/.git".to_string()];
1031        let glob_set = build_glob_set(&patterns).unwrap();
1032
1033        assert!(glob_set.is_match(".git"));
1034        assert!(glob_set.is_match(".git/config"));
1035        assert!(glob_set.is_match(".git/objects/pack/abc123"));
1036        assert!(glob_set.is_match(".git/refs/heads/main"));
1037
1038        // But nested .git should NOT match (rooted pattern)
1039        assert!(!glob_set.is_match("submodule/.git"));
1040        assert!(!glob_set.is_match("packages/sub/.git/config"));
1041    }
1042
1043    #[test]
1044    fn test_rooted_patterns_with_relative_paths() {
1045        // Rooted patterns like `/build` work with relative paths (relative to project root)
1046        // For absolute paths, project root context would be needed to strip the prefix first
1047
1048        let mut config = ProjectConfig::default();
1049        config.ignore.patterns = vec!["/build".to_string(), "/.git".to_string()];
1050
1051        // Relative paths work correctly with rooted patterns
1052        assert!(config.is_ignored(Path::new("build/output.log")));
1053        assert!(config.is_ignored(Path::new("build")));
1054        assert!(config.is_ignored(Path::new(".git/config")));
1055        assert!(config.is_ignored(Path::new(".git")));
1056
1057        // Rooted patterns should NOT match in nested locations
1058        assert!(!config.is_ignored(Path::new("src/build/output.log")));
1059        assert!(!config.is_ignored(Path::new("packages/sub/build")));
1060        assert!(!config.is_ignored(Path::new("submodule/.git")));
1061    }
1062
1063    #[test]
1064    fn test_unrooted_patterns_with_absolute_paths() {
1065        // Unrooted patterns (no leading `/`) match anywhere, including absolute paths
1066        // The leading `/` from absolute paths is stripped for matching
1067
1068        let config = ProjectConfig::default();
1069        // Default patterns include `node_modules/**`, `.git/**`, etc. (unrooted)
1070
1071        // Absolute paths work because leading `/` is stripped and patterns have `**/`
1072        assert!(config.is_ignored(Path::new("/home/user/project/node_modules/pkg/index.js")));
1073        assert!(config.is_ignored(Path::new("/tmp/build/target/debug/app")));
1074        assert!(config.is_ignored(Path::new("/var/repo/.git/objects/pack/abc")));
1075
1076        // Relative paths also work
1077        assert!(config.is_ignored(Path::new("node_modules/pkg/index.js")));
1078        assert!(config.is_ignored(Path::new("target/debug/app")));
1079    }
1080
1081    #[test]
1082    fn test_normalize_path_for_matching() {
1083        // Verify the path normalization strips leading `/` and normalizes separators
1084        assert_eq!(
1085            normalize_path_for_matching(Path::new("/home/user/project/src/main.rs")),
1086            "home/user/project/src/main.rs"
1087        );
1088        assert_eq!(
1089            normalize_path_for_matching(Path::new("relative/path/file.rs")),
1090            "relative/path/file.rs"
1091        );
1092        assert_eq!(normalize_path_for_matching(Path::new("/build")), "build");
1093    }
1094}