Skip to main content

ygrep_core/
config.rs

1use serde::{Deserialize, Serialize};
2use std::path::PathBuf;
3
4/// Global ygrep configuration
5#[derive(Debug, Clone, Serialize, Deserialize)]
6#[serde(default)]
7pub struct Config {
8    /// Daemon configuration
9    pub daemon: DaemonConfig,
10
11    /// Indexing configuration
12    pub indexer: IndexerConfig,
13
14    /// Search configuration
15    pub search: SearchConfig,
16
17    /// Output formatting
18    pub output: OutputConfig,
19}
20
21#[derive(Debug, Clone, Serialize, Deserialize)]
22#[serde(default)]
23pub struct DaemonConfig {
24    /// Socket path (default: $XDG_RUNTIME_DIR/ygrep/ygrep.sock or ~/.ygrep/ygrep.sock)
25    pub socket_path: Option<PathBuf>,
26
27    /// Auto-shutdown after idle time (seconds, 0 = never)
28    pub idle_timeout: u64,
29
30    /// Maximum concurrent index operations
31    pub max_concurrent_ops: usize,
32}
33
34#[derive(Debug, Clone, Serialize, Deserialize)]
35#[serde(default)]
36pub struct IndexerConfig {
37    /// Base directory for all index data
38    pub data_dir: PathBuf,
39
40    /// Maximum file size to index (bytes)
41    pub max_file_size: u64,
42
43    /// File extensions to include (empty = all text files)
44    pub include_extensions: Vec<String>,
45
46    /// Additional ignore patterns (glob syntax)
47    pub ignore_patterns: Vec<String>,
48
49    /// Follow symlinks
50    pub follow_symlinks: bool,
51
52    /// Respect .gitignore files (default: false for code search)
53    pub respect_gitignore: bool,
54
55    /// Enable content deduplication
56    pub deduplicate: bool,
57
58    /// Chunk size for semantic indexing (lines)
59    pub chunk_size: usize,
60
61    /// Chunk overlap (lines)
62    pub chunk_overlap: usize,
63
64    /// Number of indexing threads
65    pub threads: usize,
66}
67
68#[derive(Debug, Clone, Serialize, Deserialize)]
69#[serde(default)]
70pub struct SearchConfig {
71    /// BM25 weight in hybrid search (0.0-1.0)
72    pub bm25_weight: f32,
73
74    /// Vector weight in hybrid search (0.0-1.0)
75    pub vector_weight: f32,
76
77    /// Default result limit
78    pub default_limit: usize,
79
80    /// Maximum results
81    pub max_limit: usize,
82
83    /// Minimum score threshold (0.0-1.0)
84    pub min_score: f32,
85
86    /// Enable fuzzy matching for BM25
87    pub fuzzy_enabled: bool,
88
89    /// Fuzzy distance (1-2)
90    pub fuzzy_distance: u8,
91}
92
93#[derive(Debug, Clone, Serialize, Deserialize)]
94#[serde(default)]
95pub struct OutputConfig {
96    /// AI-optimized format (minimal tokens)
97    pub ai_mode: bool,
98
99    /// Include file content snippets
100    pub show_content: bool,
101
102    /// Context lines around matches
103    pub context_lines: usize,
104
105    /// Maximum output lines per result
106    pub max_lines_per_result: usize,
107
108    /// Show scores in output
109    pub show_scores: bool,
110}
111
112impl Default for Config {
113    fn default() -> Self {
114        Self {
115            daemon: DaemonConfig::default(),
116            indexer: IndexerConfig::default(),
117            search: SearchConfig::default(),
118            output: OutputConfig::default(),
119        }
120    }
121}
122
123impl Default for DaemonConfig {
124    fn default() -> Self {
125        Self {
126            socket_path: None,
127            idle_timeout: 3600, // 1 hour
128            max_concurrent_ops: 4,
129        }
130    }
131}
132
133impl Default for IndexerConfig {
134    fn default() -> Self {
135        Self {
136            data_dir: default_data_dir(),
137            max_file_size: 10 * 1024 * 1024, // 10MB
138            include_extensions: vec![],
139            ignore_patterns: vec![
140                // Package managers & dependencies
141                "**/node_modules/**".into(),
142                "**/vendor/**".into(),
143                "**/.venv/**".into(),
144                "**/venv/**".into(),
145                "**/bower_components/**".into(),
146                // Build outputs
147                "**/target/**".into(),
148                "**/dist/**".into(),
149                "**/build/**".into(),
150                "**/out/**".into(),
151                "**/_build/**".into(),
152                "**/bin/**".into(),
153                "**/obj/**".into(),
154                // Cache directories
155                "**/cache/**".into(),
156                "**/.cache/**".into(),
157                "**/caches/**".into(),
158                "**/__pycache__/**".into(),
159                "**/.pytest_cache/**".into(),
160                "**/.mypy_cache/**".into(),
161                "**/.ruff_cache/**".into(),
162                "**/.phpunit.cache/**".into(),
163                "**/var/cache/**".into(),
164                // Log directories
165                "**/logs/**".into(),
166                "**/log/**".into(),
167                "**/*.log".into(),
168                // Temp directories
169                "**/tmp/**".into(),
170                "**/temp/**".into(),
171                "**/.tmp/**".into(),
172                // Version control
173                "**/.git/**".into(),
174                "**/.svn/**".into(),
175                "**/.hg/**".into(),
176                // IDE/Editor
177                "**/.idea/**".into(),
178                "**/.vscode/**".into(),
179                "**/.vs/**".into(),
180                "**/*.swp".into(),
181                "**/*.swo".into(),
182                // Lock files
183                "Cargo.lock".into(),
184                "package-lock.json".into(),
185                "yarn.lock".into(),
186                "pnpm-lock.yaml".into(),
187                "composer.lock".into(),
188                "Gemfile.lock".into(),
189                "poetry.lock".into(),
190                // Binary/compiled files
191                "**/*.pyc".into(),
192                "**/*.pyo".into(),
193                "**/*.class".into(),
194                "**/*.o".into(),
195                "**/*.so".into(),
196                "**/*.dylib".into(),
197                "**/*.dll".into(),
198                "**/*.exe".into(),
199                // Data files (often large)
200                "**/*.sqlite".into(),
201                "**/*.sqlite3".into(),
202                "**/*.db".into(),
203                // Coverage & test artifacts
204                "**/coverage/**".into(),
205                "**/.coverage/**".into(),
206                "**/htmlcov/**".into(),
207                "**/.nyc_output/**".into(),
208                // Images
209                "**/*.svg".into(),
210                "**/*.png".into(),
211                "**/*.jpg".into(),
212                "**/*.jpeg".into(),
213                "**/*.gif".into(),
214                "**/*.ico".into(),
215                "**/*.webp".into(),
216                "**/*.bmp".into(),
217                "**/*.tiff".into(),
218                "**/*.psd".into(),
219                // Fonts
220                "**/*.woff".into(),
221                "**/*.woff2".into(),
222                "**/*.ttf".into(),
223                "**/*.otf".into(),
224                "**/*.eot".into(),
225                // Media
226                "**/*.mp3".into(),
227                "**/*.mp4".into(),
228                "**/*.wav".into(),
229                "**/*.ogg".into(),
230                "**/*.webm".into(),
231                "**/*.avi".into(),
232                "**/*.mov".into(),
233                // Archives
234                "**/*.zip".into(),
235                "**/*.tar".into(),
236                "**/*.gz".into(),
237                "**/*.rar".into(),
238                "**/*.7z".into(),
239                // Documents (usually not code)
240                "**/*.pdf".into(),
241                "**/*.doc".into(),
242                "**/*.docx".into(),
243                "**/*.xls".into(),
244                "**/*.xlsx".into(),
245                "**/*.ppt".into(),
246                "**/*.pptx".into(),
247                // Minified/bundled files
248                "**/*.min.js".into(),
249                "**/*.min.css".into(),
250                "**/*.bundle.js".into(),
251                "**/*.chunk.js".into(),
252                // Source maps
253                "**/*.map".into(),
254            ],
255            follow_symlinks: true,
256            respect_gitignore: false,
257            deduplicate: true,
258            chunk_size: 50,
259            chunk_overlap: 10,
260            threads: std::thread::available_parallelism()
261                .map(|n| n.get().min(4))
262                .unwrap_or(2),
263        }
264    }
265}
266
267impl Default for SearchConfig {
268    fn default() -> Self {
269        Self {
270            bm25_weight: 0.5,
271            vector_weight: 0.5,
272            default_limit: 10,
273            max_limit: 100,
274            min_score: 0.1,
275            fuzzy_enabled: true,
276            fuzzy_distance: 1,
277        }
278    }
279}
280
281impl Default for OutputConfig {
282    fn default() -> Self {
283        Self {
284            ai_mode: true,
285            show_content: true,
286            context_lines: 2,
287            max_lines_per_result: 10,
288            show_scores: false,
289        }
290    }
291}
292
293fn default_data_dir() -> PathBuf {
294    // 1. YGREP_HOME — dedicated override, used as-is
295    if let Ok(ygrep_home) = std::env::var("YGREP_HOME") {
296        if !ygrep_home.is_empty() {
297            return PathBuf::from(ygrep_home);
298        }
299    }
300    // 2. XDG_DATA_HOME/ygrep
301    if let Ok(xdg_data) = std::env::var("XDG_DATA_HOME") {
302        if !xdg_data.is_empty() {
303            return PathBuf::from(xdg_data).join("ygrep");
304        }
305    }
306    // 3. Platform default
307    dirs::data_dir()
308        .unwrap_or_else(|| PathBuf::from("~/.local/share"))
309        .join("ygrep")
310}
311
312impl Config {
313    /// Load config from default locations (in order of precedence):
314    /// 1. $PWD/.ygrep.toml
315    /// 2. $XDG_CONFIG_HOME/ygrep/config.toml
316    /// 3. ~/.config/ygrep/config.toml
317    /// 4. Built-in defaults
318    pub fn load() -> Self {
319        // Try project-level config
320        if let Ok(content) = std::fs::read_to_string(".ygrep.toml") {
321            if let Ok(config) = toml::from_str(&content) {
322                return config;
323            }
324        }
325
326        // Try XDG_CONFIG_HOME first (even on macOS)
327        if let Ok(xdg_config) = std::env::var("XDG_CONFIG_HOME") {
328            if !xdg_config.is_empty() {
329                let config_path = PathBuf::from(&xdg_config).join("ygrep").join("config.toml");
330                if let Ok(content) = std::fs::read_to_string(&config_path) {
331                    if let Ok(config) = toml::from_str(&content) {
332                        return config;
333                    }
334                }
335            }
336        }
337
338        // Try platform default config dir
339        if let Some(config_dir) = dirs::config_dir() {
340            let config_path = config_dir.join("ygrep").join("config.toml");
341            if let Ok(content) = std::fs::read_to_string(&config_path) {
342                if let Ok(config) = toml::from_str(&content) {
343                    return config;
344                }
345            }
346        }
347
348        // Fall back to defaults
349        Self::default()
350    }
351
352    /// Load config from a specific file
353    pub fn load_from(path: &std::path::Path) -> Result<Self, ConfigError> {
354        let content = std::fs::read_to_string(path)?;
355        let config = toml::from_str(&content)?;
356        Ok(config)
357    }
358
359    /// Get the socket path, using default if not specified
360    pub fn socket_path(&self) -> PathBuf {
361        self.daemon
362            .socket_path
363            .clone()
364            .unwrap_or_else(default_socket_path)
365    }
366}
367
368fn default_socket_path() -> PathBuf {
369    // Honor XDG_RUNTIME_DIR if set (even on macOS)
370    if let Ok(xdg_runtime) = std::env::var("XDG_RUNTIME_DIR") {
371        if !xdg_runtime.is_empty() {
372            return PathBuf::from(xdg_runtime).join("ygrep").join("ygrep.sock");
373        }
374    }
375    if let Some(runtime_dir) = dirs::runtime_dir() {
376        runtime_dir.join("ygrep").join("ygrep.sock")
377    } else if let Some(home) = dirs::home_dir() {
378        home.join(".ygrep").join("ygrep.sock")
379    } else {
380        PathBuf::from("/tmp/ygrep/ygrep.sock")
381    }
382}
383
384#[derive(Debug, thiserror::Error)]
385pub enum ConfigError {
386    #[error("Failed to read config file: {0}")]
387    Io(#[from] std::io::Error),
388
389    #[error("Failed to parse config: {0}")]
390    Parse(#[from] toml::de::Error),
391}
392
393#[cfg(test)]
394mod tests {
395    use super::*;
396
397    #[test]
398    fn test_default_config_sane_values() {
399        let config = Config::default();
400
401        // Indexer defaults
402        assert!(config.indexer.max_file_size > 0);
403        assert!(config.indexer.chunk_size > 0);
404        assert!(config.indexer.chunk_overlap < config.indexer.chunk_size);
405        assert!(config.indexer.threads > 0);
406
407        // Search weights should be between 0 and 1
408        assert!(config.search.bm25_weight >= 0.0 && config.search.bm25_weight <= 1.0);
409        assert!(config.search.vector_weight >= 0.0 && config.search.vector_weight <= 1.0);
410
411        // Limits
412        assert!(config.search.default_limit > 0);
413        assert!(config.search.max_limit >= config.search.default_limit);
414    }
415
416    #[test]
417    fn test_config_load_returns_defaults_when_no_file() {
418        // Config::load() should return defaults when no config file exists
419        // (we're in a test environment, so unlikely to have .ygrep.toml in cwd)
420        let config = Config::load();
421        let default = Config::default();
422
423        assert_eq!(config.indexer.max_file_size, default.indexer.max_file_size);
424        assert_eq!(config.search.default_limit, default.search.default_limit);
425    }
426
427    #[test]
428    fn test_config_load_from_nonexistent_file() {
429        let result = Config::load_from(std::path::Path::new("/nonexistent/config.toml"));
430        assert!(result.is_err());
431    }
432}