Skip to main content

infiniloom_engine/
config.rs

1//! Configuration file support for Infiniloom
2//!
3//! Supports `.infiniloomrc`, `.infiniloom.yaml`, `.infiniloom.toml`, and `.infiniloom.json`
4//! with environment variable override support.
5
6use figment::{
7    providers::{Env, Format, Json, Serialized, Toml, Yaml},
8    Figment,
9};
10use serde::{Deserialize, Serialize};
11use std::path::Path;
12use thiserror::Error;
13
14/// Main configuration structure
15#[derive(Debug, Clone, Serialize, Deserialize)]
16#[serde(default)]
17pub struct Config {
18    /// Configuration version (for future compatibility)
19    pub version: u32,
20
21    /// Scanning options
22    pub scan: ScanConfig,
23
24    /// Output options
25    pub output: OutputConfig,
26
27    /// Symbol extraction options
28    pub symbols: SymbolConfig,
29
30    /// Security scanning options
31    pub security: SecurityConfig,
32
33    /// Performance options
34    pub performance: PerformanceConfig,
35
36    /// Include/exclude patterns
37    pub patterns: PatternConfig,
38}
39
40impl Default for Config {
41    fn default() -> Self {
42        Self {
43            version: 1,
44            scan: ScanConfig::default(),
45            output: OutputConfig::default(),
46            symbols: SymbolConfig::default(),
47            security: SecurityConfig::default(),
48            performance: PerformanceConfig::default(),
49            patterns: PatternConfig::default(),
50        }
51    }
52}
53
54/// Scanning configuration
55#[derive(Debug, Clone, Serialize, Deserialize)]
56#[serde(default)]
57pub struct ScanConfig {
58    /// Include patterns (glob syntax)
59    pub include: Vec<String>,
60
61    /// Exclude patterns (glob syntax)
62    pub exclude: Vec<String>,
63
64    /// Maximum file size to include (in bytes, supports "100KB", "1MB" etc)
65    pub max_file_size: String,
66
67    /// Follow symbolic links
68    pub follow_symlinks: bool,
69
70    /// Include hidden files (starting with .)
71    pub include_hidden: bool,
72
73    /// Respect .gitignore files
74    pub respect_gitignore: bool,
75
76    /// Read file contents (false = metadata only)
77    pub read_contents: bool,
78}
79
80impl Default for ScanConfig {
81    fn default() -> Self {
82        Self {
83            include: vec!["**/*".to_owned()],
84            exclude: vec![
85                "**/node_modules/**".to_owned(),
86                "**/.git/**".to_owned(),
87                "**/target/**".to_owned(),
88                "**/__pycache__/**".to_owned(),
89                "**/dist/**".to_owned(),
90                "**/build/**".to_owned(),
91                "**/.venv/**".to_owned(),
92                "**/venv/**".to_owned(),
93                "**/*.min.js".to_owned(),
94                "**/*.min.css".to_owned(),
95            ],
96            max_file_size: "10MB".to_owned(),
97            follow_symlinks: false,
98            include_hidden: false,
99            respect_gitignore: true,
100            read_contents: true,
101        }
102    }
103}
104
105impl ScanConfig {
106    /// Parse max_file_size string to bytes
107    pub fn max_file_size_bytes(&self) -> u64 {
108        parse_size(&self.max_file_size).unwrap_or(10 * 1024 * 1024)
109    }
110}
111
112/// Output configuration
113#[derive(Debug, Clone, Serialize, Deserialize)]
114#[serde(default)]
115pub struct OutputConfig {
116    /// Output format: xml, markdown, json, yaml
117    pub format: String,
118
119    /// Target LLM model: claude, gpt4o, gpt4, gemini, llama
120    pub model: String,
121
122    /// Compression level: none, minimal, balanced, aggressive, extreme
123    pub compression: String,
124
125    /// Maximum token budget (0 = unlimited)
126    pub token_budget: u32,
127
128    /// Include line numbers in code output
129    pub line_numbers: bool,
130
131    /// Optimize output for prompt caching (Claude)
132    pub cache_optimized: bool,
133
134    /// Output file path (- for stdout)
135    pub output_file: String,
136
137    /// Custom header text to include at the top of output
138    pub header_text: Option<String>,
139
140    /// Path to file containing custom instructions to include
141    pub instruction_file: Option<String>,
142
143    /// Copy output to clipboard after generation
144    pub copy_to_clipboard: bool,
145
146    /// Show token count tree/breakdown in output
147    pub show_token_tree: bool,
148
149    /// Include directory structure in output
150    pub show_directory_structure: bool,
151
152    /// Include file summary section
153    pub show_file_summary: bool,
154
155    /// Remove empty lines from code
156    pub remove_empty_lines: bool,
157
158    /// Remove comments from code
159    pub remove_comments: bool,
160
161    /// Number of top files to show in summary (0 = all)
162    pub top_files_length: usize,
163
164    /// Include empty directories in structure
165    pub include_empty_directories: bool,
166}
167
168impl Default for OutputConfig {
169    fn default() -> Self {
170        Self {
171            format: "xml".to_owned(),
172            model: "claude".to_owned(),
173            compression: "none".to_owned(),
174            token_budget: 0,
175            line_numbers: true,
176            cache_optimized: true,
177            output_file: "-".to_owned(),
178            header_text: None,
179            instruction_file: None,
180            copy_to_clipboard: false,
181            show_token_tree: false,
182            show_directory_structure: true,
183            show_file_summary: true,
184            remove_empty_lines: false,
185            remove_comments: false,
186            top_files_length: 0,
187            include_empty_directories: false,
188        }
189    }
190}
191
192/// Symbol extraction configuration
193#[derive(Debug, Clone, Serialize, Deserialize)]
194#[serde(default)]
195pub struct SymbolConfig {
196    /// Enable symbol extraction
197    pub enabled: bool,
198
199    /// Languages to parse (empty = all supported)
200    pub languages: Vec<String>,
201
202    /// Extract docstrings/documentation
203    pub extract_docstrings: bool,
204
205    /// Extract function/method signatures
206    pub extract_signatures: bool,
207
208    /// Maximum number of symbols to include in repomap
209    pub max_symbols: usize,
210
211    /// Include imports in symbol graph
212    pub include_imports: bool,
213
214    /// Build dependency graph
215    pub build_dependency_graph: bool,
216}
217
218impl Default for SymbolConfig {
219    fn default() -> Self {
220        Self {
221            enabled: true,
222            languages: vec![],
223            extract_docstrings: true,
224            extract_signatures: true,
225            max_symbols: 100,
226            include_imports: true,
227            build_dependency_graph: true,
228        }
229    }
230}
231
232/// Security scanning configuration
233#[derive(Debug, Clone, Serialize, Deserialize)]
234#[serde(default)]
235pub struct SecurityConfig {
236    /// Enable secret scanning
237    pub scan_secrets: bool,
238
239    /// Fail on secrets detected
240    pub fail_on_secrets: bool,
241
242    /// Patterns to allowlist (won't be flagged)
243    pub allowlist: Vec<String>,
244
245    /// Additional secret patterns (regex)
246    pub custom_patterns: Vec<String>,
247
248    /// Redact secrets in output
249    pub redact_secrets: bool,
250}
251
252impl Default for SecurityConfig {
253    fn default() -> Self {
254        Self {
255            scan_secrets: true,
256            fail_on_secrets: false,
257            allowlist: vec![],
258            custom_patterns: vec![],
259            redact_secrets: true,
260        }
261    }
262}
263
264/// Performance configuration
265#[derive(Debug, Clone, Serialize, Deserialize)]
266#[serde(default)]
267pub struct PerformanceConfig {
268    /// Number of threads (0 = auto)
269    pub threads: usize,
270
271    /// Enable incremental mode (cache results)
272    pub incremental: bool,
273
274    /// Cache directory
275    pub cache_dir: String,
276
277    /// Use memory-mapped I/O for large files
278    pub memory_mapped: bool,
279
280    /// Skip symbol extraction for faster scanning
281    pub skip_symbols: bool,
282
283    /// Maximum files to process in a single parallel batch
284    /// Prevents stack overflow on very large repos (75K+ files)
285    /// Default: 5000
286    pub batch_size: usize,
287}
288
289impl Default for PerformanceConfig {
290    fn default() -> Self {
291        Self {
292            threads: 0, // auto
293            incremental: false,
294            cache_dir: ".infiniloom/cache".to_owned(),
295            memory_mapped: true,
296            skip_symbols: false,
297            batch_size: 5000, // Default from scanner::DEFAULT_BATCH_SIZE
298        }
299    }
300}
301
302/// Pattern configuration for filtering
303#[derive(Debug, Clone, Serialize, Deserialize)]
304#[serde(default)]
305pub struct PatternConfig {
306    /// File extensions to include (empty = all)
307    pub extensions: Vec<String>,
308
309    /// Paths to always include (high priority)
310    pub priority_paths: Vec<String>,
311
312    /// Paths to always exclude (even if matched by include)
313    pub ignore_paths: Vec<String>,
314
315    /// Only include files modified after this git ref
316    pub modified_since: Option<String>,
317
318    /// Only include files by specific author
319    pub by_author: Option<String>,
320}
321
322impl Default for PatternConfig {
323    fn default() -> Self {
324        Self {
325            extensions: vec![],
326            priority_paths: vec![
327                "README.md".to_owned(),
328                "package.json".to_owned(),
329                "Cargo.toml".to_owned(),
330                "pyproject.toml".to_owned(),
331            ],
332            ignore_paths: vec!["*.lock".to_owned(), "*.sum".to_owned()],
333            modified_since: None,
334            by_author: None,
335        }
336    }
337}
338
339impl Config {
340    /// Load configuration from default locations
341    #[allow(clippy::result_large_err)]
342    pub fn load(repo_path: &Path) -> Result<Self, ConfigError> {
343        Self::load_with_profile(repo_path, None)
344    }
345
346    /// Load configuration with optional profile override
347    #[allow(clippy::result_large_err)]
348    pub fn load_with_profile(repo_path: &Path, profile: Option<&str>) -> Result<Self, ConfigError> {
349        let mut figment = Figment::new().merge(Serialized::defaults(Config::default()));
350
351        // Try loading from various config file locations
352        let config_files = [
353            repo_path.join(".infiniloomrc"),
354            repo_path.join(".infiniloom.yaml"),
355            repo_path.join(".infiniloom.yml"),
356            repo_path.join(".infiniloom.toml"),
357            repo_path.join(".infiniloom.json"),
358            repo_path.join("infiniloom.yaml"),
359            repo_path.join("infiniloom.toml"),
360            repo_path.join("infiniloom.json"),
361        ];
362
363        for config_file in &config_files {
364            if config_file.exists() {
365                figment = match config_file.extension().and_then(|e| e.to_str()) {
366                    Some("yaml") | Some("yml") => figment.merge(Yaml::file(config_file)),
367                    Some("toml") => figment.merge(Toml::file(config_file)),
368                    Some("json") => figment.merge(Json::file(config_file)),
369                    None => {
370                        // .infiniloomrc - try YAML first, then TOML
371                        if let Ok(content) = std::fs::read_to_string(config_file) {
372                            if content.trim_start().starts_with('{') {
373                                figment.merge(Json::file(config_file))
374                            } else if content.contains(':') {
375                                figment.merge(Yaml::file(config_file))
376                            } else {
377                                figment.merge(Toml::file(config_file))
378                            }
379                        } else {
380                            figment
381                        }
382                    },
383                    _ => figment,
384                };
385                break; // Use first found config file
386            }
387        }
388
389        // Check home directory for global config
390        if let Some(home) = dirs::home_dir() {
391            let global_config = home.join(".config/infiniloom/config.yaml");
392            if global_config.exists() {
393                figment = figment.merge(Yaml::file(global_config));
394            }
395        }
396
397        // Environment variable overrides (INFINILOOM_*)
398        figment = figment.merge(Env::prefixed("INFINILOOM_").split("__"));
399
400        // Apply profile if specified
401        if let Some(profile_name) = profile {
402            figment = figment.select(profile_name);
403        }
404
405        figment.extract().map_err(ConfigError::ParseError)
406    }
407
408    /// Save configuration to a file
409    #[allow(clippy::result_large_err)]
410    pub fn save(&self, path: &Path) -> Result<(), ConfigError> {
411        let content = match path.extension().and_then(|e| e.to_str()) {
412            Some("json") => serde_json::to_string_pretty(self)
413                .map_err(|e| ConfigError::SerializeError(e.to_string()))?,
414            Some("toml") => toml::to_string_pretty(self)
415                .map_err(|e| ConfigError::SerializeError(e.to_string()))?,
416            _ => serde_yaml::to_string(self)
417                .map_err(|e| ConfigError::SerializeError(e.to_string()))?,
418        };
419
420        std::fs::write(path, content).map_err(ConfigError::IoError)
421    }
422
423    /// Generate a default configuration file
424    /// Only includes fields that the CLI actually reads to avoid misleading users
425    pub fn generate_default(format: &str) -> String {
426        // Use a minimal config with only fields the CLI actually uses
427        // This avoids confusion from fields that appear in config but are silently ignored
428        let minimal = MinimalConfig::default();
429        match format {
430            "json" => serde_json::to_string_pretty(&minimal).unwrap_or_default(),
431            "toml" => toml::to_string_pretty(&minimal).unwrap_or_default(),
432            _ => serde_yaml::to_string(&minimal).unwrap_or_default(),
433        }
434    }
435}
436
437/// Minimal configuration with only fields the CLI actually uses
438/// This prevents user confusion from config fields that are silently ignored
439#[derive(Debug, Clone, Serialize, Deserialize)]
440struct MinimalConfig {
441    /// Output settings
442    output: MinimalOutputConfig,
443    /// Scan settings
444    scan: MinimalScanConfig,
445    /// Security settings
446    security: MinimalSecurityConfig,
447    /// Include test files
448    #[serde(skip_serializing_if = "is_false")]
449    include_tests: bool,
450    /// Include documentation files
451    #[serde(skip_serializing_if = "is_false")]
452    include_docs: bool,
453}
454
455fn is_false(b: &bool) -> bool {
456    !*b
457}
458
459#[derive(Debug, Clone, Serialize, Deserialize)]
460struct MinimalOutputConfig {
461    /// Output format: xml, markdown, json
462    format: String,
463    /// Target model: claude, gpt4o, gpt4, gemini, llama
464    model: String,
465    /// Compression: none, minimal, balanced, aggressive, extreme, semantic
466    compression: String,
467    /// Token budget (0 = unlimited)
468    token_budget: u32,
469    /// Show line numbers in output
470    line_numbers: bool,
471    /// Include directory structure
472    show_directory_structure: bool,
473    /// Include file summary
474    show_file_summary: bool,
475}
476
477#[derive(Debug, Clone, Serialize, Deserialize)]
478struct MinimalScanConfig {
479    /// Include glob patterns
480    include: Vec<String>,
481    /// Exclude glob patterns
482    exclude: Vec<String>,
483}
484
485#[derive(Debug, Clone, Serialize, Deserialize)]
486struct MinimalSecurityConfig {
487    /// Enable secret scanning
488    scan_secrets: bool,
489    /// Fail if secrets detected
490    fail_on_secrets: bool,
491    /// Redact secrets in output
492    redact_secrets: bool,
493    /// Patterns to allowlist
494    #[serde(skip_serializing_if = "Vec::is_empty")]
495    allowlist: Vec<String>,
496    /// Custom secret patterns (regex)
497    #[serde(skip_serializing_if = "Vec::is_empty")]
498    custom_patterns: Vec<String>,
499}
500
501impl Default for MinimalConfig {
502    fn default() -> Self {
503        Self {
504            output: MinimalOutputConfig {
505                format: "xml".to_owned(),
506                model: "claude".to_owned(),
507                compression: "balanced".to_owned(),
508                token_budget: 0,
509                line_numbers: true,
510                show_directory_structure: true,
511                show_file_summary: true,
512            },
513            scan: MinimalScanConfig {
514                include: vec![],
515                exclude: vec![
516                    "**/node_modules/**".to_owned(),
517                    "**/.git/**".to_owned(),
518                    "**/target/**".to_owned(),
519                    "**/__pycache__/**".to_owned(),
520                    "**/dist/**".to_owned(),
521                    "**/build/**".to_owned(),
522                ],
523            },
524            security: MinimalSecurityConfig {
525                scan_secrets: true,
526                fail_on_secrets: false,
527                redact_secrets: true,
528                allowlist: vec![],
529                custom_patterns: vec![],
530            },
531            include_tests: false,
532            include_docs: false,
533        }
534    }
535}
536
537impl Config {
538    /// Get the effective number of threads
539    pub fn effective_threads(&self) -> usize {
540        if self.performance.threads == 0 {
541            std::thread::available_parallelism()
542                .map(|p| p.get())
543                .unwrap_or(4)
544        } else {
545            self.performance.threads
546        }
547    }
548}
549
550/// Configuration errors
551#[derive(Debug, Error)]
552pub enum ConfigError {
553    #[error("Configuration parse error: {0}")]
554    ParseError(#[from] figment::Error),
555    #[error("Configuration I/O error: {0}")]
556    IoError(#[from] std::io::Error),
557    #[error("Configuration serialize error: {0}")]
558    SerializeError(String),
559}
560
561/// Parse a size string like "100KB", "1MB", "500" into bytes
562fn parse_size(s: &str) -> Option<u64> {
563    let s = s.trim().to_uppercase();
564
565    // Try parsing as plain number first
566    if let Ok(n) = s.parse::<u64>() {
567        return Some(n);
568    }
569
570    // Parse with suffix
571    let (num_str, multiplier) = if s.ends_with("KB") || s.ends_with('K') {
572        (s.trim_end_matches("KB").trim_end_matches('K'), 1024u64)
573    } else if s.ends_with("MB") || s.ends_with('M') {
574        (s.trim_end_matches("MB").trim_end_matches('M'), 1024 * 1024)
575    } else if s.ends_with("GB") || s.ends_with('G') {
576        (s.trim_end_matches("GB").trim_end_matches('G'), 1024 * 1024 * 1024)
577    } else if s.ends_with('B') {
578        (s.trim_end_matches('B'), 1)
579    } else {
580        return None;
581    };
582
583    num_str.trim().parse::<u64>().ok().map(|n| n * multiplier)
584}
585
586// Provide dirs crate functionality inline if not available
587mod dirs {
588    use std::path::PathBuf;
589
590    pub(super) fn home_dir() -> Option<PathBuf> {
591        std::env::var_os("HOME")
592            .or_else(|| std::env::var_os("USERPROFILE"))
593            .map(PathBuf::from)
594    }
595}
596
597#[cfg(test)]
598mod tests {
599    use super::*;
600
601    #[test]
602    fn test_default_config() {
603        let config = Config::default();
604        assert_eq!(config.version, 1);
605        assert!(config.scan.respect_gitignore);
606        assert_eq!(config.output.format, "xml");
607    }
608
609    #[test]
610    fn test_parse_size() {
611        assert_eq!(parse_size("100"), Some(100));
612        assert_eq!(parse_size("100B"), Some(100));
613        assert_eq!(parse_size("1KB"), Some(1024));
614        assert_eq!(parse_size("1K"), Some(1024));
615        assert_eq!(parse_size("10MB"), Some(10 * 1024 * 1024));
616        assert_eq!(parse_size("1GB"), Some(1024 * 1024 * 1024));
617        assert_eq!(parse_size("invalid"), None);
618    }
619
620    #[test]
621    fn test_generate_default_yaml() {
622        let yaml = Config::generate_default("yaml");
623        // MinimalConfig contains output and scan sections
624        assert!(yaml.contains("output:"));
625        assert!(yaml.contains("scan:"));
626        assert!(yaml.contains("format:"));
627    }
628
629    #[test]
630    fn test_generate_default_toml() {
631        let toml = Config::generate_default("toml");
632        // MinimalConfig contains output and scan sections
633        assert!(toml.contains("[output]"));
634        assert!(toml.contains("[scan]"));
635    }
636
637    #[test]
638    fn test_generate_default_json() {
639        let json = Config::generate_default("json");
640        // MinimalConfig contains output and scan sections
641        assert!(json.contains("\"output\""));
642        assert!(json.contains("\"scan\""));
643    }
644
645    #[test]
646    fn test_effective_threads() {
647        let mut config = Config::default();
648        config.performance.threads = 0;
649        assert!(config.effective_threads() > 0);
650
651        config.performance.threads = 8;
652        assert_eq!(config.effective_threads(), 8);
653    }
654}