infiniloom_engine/
config.rs

1//! Configuration file support for Infiniloom
2//!
3//! Supports `.infiniloomrc`, `.infiniloom.yaml`, `.infiniloom.toml`, and `.infiniloom.json`
4//! with environment variable override support.
5
6use figment::{
7    providers::{Env, Format, Json, Serialized, Toml, Yaml},
8    Figment,
9};
10use serde::{Deserialize, Serialize};
11use std::path::Path;
12use thiserror::Error;
13
14/// Main configuration structure
15#[derive(Debug, Clone, Serialize, Deserialize)]
16#[serde(default)]
17pub struct Config {
18    /// Configuration version (for future compatibility)
19    pub version: u32,
20
21    /// Scanning options
22    pub scan: ScanConfig,
23
24    /// Output options
25    pub output: OutputConfig,
26
27    /// Symbol extraction options
28    pub symbols: SymbolConfig,
29
30    /// Security scanning options
31    pub security: SecurityConfig,
32
33    /// Performance options
34    pub performance: PerformanceConfig,
35
36    /// Include/exclude patterns
37    pub patterns: PatternConfig,
38}
39
40impl Default for Config {
41    fn default() -> Self {
42        Self {
43            version: 1,
44            scan: ScanConfig::default(),
45            output: OutputConfig::default(),
46            symbols: SymbolConfig::default(),
47            security: SecurityConfig::default(),
48            performance: PerformanceConfig::default(),
49            patterns: PatternConfig::default(),
50        }
51    }
52}
53
54/// Scanning configuration
55#[derive(Debug, Clone, Serialize, Deserialize)]
56#[serde(default)]
57pub struct ScanConfig {
58    /// Include patterns (glob syntax)
59    pub include: Vec<String>,
60
61    /// Exclude patterns (glob syntax)
62    pub exclude: Vec<String>,
63
64    /// Maximum file size to include (in bytes, supports "100KB", "1MB" etc)
65    pub max_file_size: String,
66
67    /// Follow symbolic links
68    pub follow_symlinks: bool,
69
70    /// Include hidden files (starting with .)
71    pub include_hidden: bool,
72
73    /// Respect .gitignore files
74    pub respect_gitignore: bool,
75
76    /// Read file contents (false = metadata only)
77    pub read_contents: bool,
78}
79
80impl Default for ScanConfig {
81    fn default() -> Self {
82        Self {
83            include: vec!["**/*".to_owned()],
84            exclude: vec![
85                "**/node_modules/**".to_owned(),
86                "**/.git/**".to_owned(),
87                "**/target/**".to_owned(),
88                "**/__pycache__/**".to_owned(),
89                "**/dist/**".to_owned(),
90                "**/build/**".to_owned(),
91                "**/.venv/**".to_owned(),
92                "**/venv/**".to_owned(),
93                "**/*.min.js".to_owned(),
94                "**/*.min.css".to_owned(),
95            ],
96            max_file_size: "10MB".to_owned(),
97            follow_symlinks: false,
98            include_hidden: false,
99            respect_gitignore: true,
100            read_contents: true,
101        }
102    }
103}
104
105impl ScanConfig {
106    /// Parse max_file_size string to bytes
107    pub fn max_file_size_bytes(&self) -> u64 {
108        parse_size(&self.max_file_size).unwrap_or(10 * 1024 * 1024)
109    }
110}
111
112/// Output configuration
113#[derive(Debug, Clone, Serialize, Deserialize)]
114#[serde(default)]
115pub struct OutputConfig {
116    /// Output format: xml, markdown, json, yaml
117    pub format: String,
118
119    /// Target LLM model: claude, gpt4o, gpt4, gemini, llama
120    pub model: String,
121
122    /// Compression level: none, minimal, balanced, aggressive, extreme
123    pub compression: String,
124
125    /// Maximum token budget (0 = unlimited)
126    pub token_budget: u32,
127
128    /// Include line numbers in code output
129    pub line_numbers: bool,
130
131    /// Optimize output for prompt caching (Claude)
132    pub cache_optimized: bool,
133
134    /// Output file path (- for stdout)
135    pub output_file: String,
136
137    /// Custom header text to include at the top of output
138    pub header_text: Option<String>,
139
140    /// Path to file containing custom instructions to include
141    pub instruction_file: Option<String>,
142
143    /// Copy output to clipboard after generation
144    pub copy_to_clipboard: bool,
145
146    /// Show token count tree/breakdown in output
147    pub show_token_tree: bool,
148
149    /// Include directory structure in output
150    pub show_directory_structure: bool,
151
152    /// Include file summary section
153    pub show_file_summary: bool,
154
155    /// Remove empty lines from code
156    pub remove_empty_lines: bool,
157
158    /// Remove comments from code
159    pub remove_comments: bool,
160
161    /// Number of top files to show in summary (0 = all)
162    pub top_files_length: usize,
163
164    /// Include empty directories in structure
165    pub include_empty_directories: bool,
166}
167
168impl Default for OutputConfig {
169    fn default() -> Self {
170        Self {
171            format: "xml".to_owned(),
172            model: "claude".to_owned(),
173            compression: "none".to_owned(),
174            token_budget: 0,
175            line_numbers: true,
176            cache_optimized: true,
177            output_file: "-".to_owned(),
178            header_text: None,
179            instruction_file: None,
180            copy_to_clipboard: false,
181            show_token_tree: false,
182            show_directory_structure: true,
183            show_file_summary: true,
184            remove_empty_lines: false,
185            remove_comments: false,
186            top_files_length: 0,
187            include_empty_directories: false,
188        }
189    }
190}
191
192/// Symbol extraction configuration
193#[derive(Debug, Clone, Serialize, Deserialize)]
194#[serde(default)]
195pub struct SymbolConfig {
196    /// Enable symbol extraction
197    pub enabled: bool,
198
199    /// Languages to parse (empty = all supported)
200    pub languages: Vec<String>,
201
202    /// Extract docstrings/documentation
203    pub extract_docstrings: bool,
204
205    /// Extract function/method signatures
206    pub extract_signatures: bool,
207
208    /// Maximum number of symbols to include in repomap
209    pub max_symbols: usize,
210
211    /// Include imports in symbol graph
212    pub include_imports: bool,
213
214    /// Build dependency graph
215    pub build_dependency_graph: bool,
216}
217
218impl Default for SymbolConfig {
219    fn default() -> Self {
220        Self {
221            enabled: true,
222            languages: vec![],
223            extract_docstrings: true,
224            extract_signatures: true,
225            max_symbols: 100,
226            include_imports: true,
227            build_dependency_graph: true,
228        }
229    }
230}
231
232/// Security scanning configuration
233#[derive(Debug, Clone, Serialize, Deserialize)]
234#[serde(default)]
235pub struct SecurityConfig {
236    /// Enable secret scanning
237    pub scan_secrets: bool,
238
239    /// Fail on secrets detected
240    pub fail_on_secrets: bool,
241
242    /// Patterns to allowlist (won't be flagged)
243    pub allowlist: Vec<String>,
244
245    /// Additional secret patterns (regex)
246    pub custom_patterns: Vec<String>,
247
248    /// Redact secrets in output
249    pub redact_secrets: bool,
250}
251
252impl Default for SecurityConfig {
253    fn default() -> Self {
254        Self {
255            scan_secrets: true,
256            fail_on_secrets: false,
257            allowlist: vec![],
258            custom_patterns: vec![],
259            redact_secrets: true,
260        }
261    }
262}
263
264/// Performance configuration
265#[derive(Debug, Clone, Serialize, Deserialize)]
266#[serde(default)]
267pub struct PerformanceConfig {
268    /// Number of threads (0 = auto)
269    pub threads: usize,
270
271    /// Enable incremental mode (cache results)
272    pub incremental: bool,
273
274    /// Cache directory
275    pub cache_dir: String,
276
277    /// Use memory-mapped I/O for large files
278    pub memory_mapped: bool,
279
280    /// Skip symbol extraction for faster scanning
281    pub skip_symbols: bool,
282}
283
284impl Default for PerformanceConfig {
285    fn default() -> Self {
286        Self {
287            threads: 0, // auto
288            incremental: false,
289            cache_dir: ".infiniloom/cache".to_owned(),
290            memory_mapped: true,
291            skip_symbols: false,
292        }
293    }
294}
295
296/// Pattern configuration for filtering
297#[derive(Debug, Clone, Serialize, Deserialize)]
298#[serde(default)]
299pub struct PatternConfig {
300    /// File extensions to include (empty = all)
301    pub extensions: Vec<String>,
302
303    /// Paths to always include (high priority)
304    pub priority_paths: Vec<String>,
305
306    /// Paths to always exclude (even if matched by include)
307    pub ignore_paths: Vec<String>,
308
309    /// Only include files modified after this git ref
310    pub modified_since: Option<String>,
311
312    /// Only include files by specific author
313    pub by_author: Option<String>,
314}
315
316impl Default for PatternConfig {
317    fn default() -> Self {
318        Self {
319            extensions: vec![],
320            priority_paths: vec![
321                "README.md".to_owned(),
322                "package.json".to_owned(),
323                "Cargo.toml".to_owned(),
324                "pyproject.toml".to_owned(),
325            ],
326            ignore_paths: vec!["*.lock".to_owned(), "*.sum".to_owned()],
327            modified_since: None,
328            by_author: None,
329        }
330    }
331}
332
333impl Config {
334    /// Load configuration from default locations
335    #[allow(clippy::result_large_err)]
336    pub fn load(repo_path: &Path) -> Result<Self, ConfigError> {
337        Self::load_with_profile(repo_path, None)
338    }
339
340    /// Load configuration with optional profile override
341    #[allow(clippy::result_large_err)]
342    pub fn load_with_profile(repo_path: &Path, profile: Option<&str>) -> Result<Self, ConfigError> {
343        let mut figment = Figment::new().merge(Serialized::defaults(Config::default()));
344
345        // Try loading from various config file locations
346        let config_files = [
347            repo_path.join(".infiniloomrc"),
348            repo_path.join(".infiniloom.yaml"),
349            repo_path.join(".infiniloom.yml"),
350            repo_path.join(".infiniloom.toml"),
351            repo_path.join(".infiniloom.json"),
352            repo_path.join("infiniloom.yaml"),
353            repo_path.join("infiniloom.toml"),
354            repo_path.join("infiniloom.json"),
355        ];
356
357        for config_file in &config_files {
358            if config_file.exists() {
359                figment = match config_file.extension().and_then(|e| e.to_str()) {
360                    Some("yaml") | Some("yml") => figment.merge(Yaml::file(config_file)),
361                    Some("toml") => figment.merge(Toml::file(config_file)),
362                    Some("json") => figment.merge(Json::file(config_file)),
363                    None => {
364                        // .infiniloomrc - try YAML first, then TOML
365                        if let Ok(content) = std::fs::read_to_string(config_file) {
366                            if content.trim_start().starts_with('{') {
367                                figment.merge(Json::file(config_file))
368                            } else if content.contains(':') {
369                                figment.merge(Yaml::file(config_file))
370                            } else {
371                                figment.merge(Toml::file(config_file))
372                            }
373                        } else {
374                            figment
375                        }
376                    },
377                    _ => figment,
378                };
379                break; // Use first found config file
380            }
381        }
382
383        // Check home directory for global config
384        if let Some(home) = dirs::home_dir() {
385            let global_config = home.join(".config/infiniloom/config.yaml");
386            if global_config.exists() {
387                figment = figment.merge(Yaml::file(global_config));
388            }
389        }
390
391        // Environment variable overrides (INFINILOOM_*)
392        figment = figment.merge(Env::prefixed("INFINILOOM_").split("__"));
393
394        // Apply profile if specified
395        if let Some(profile_name) = profile {
396            figment = figment.select(profile_name);
397        }
398
399        figment.extract().map_err(ConfigError::ParseError)
400    }
401
402    /// Save configuration to a file
403    #[allow(clippy::result_large_err)]
404    pub fn save(&self, path: &Path) -> Result<(), ConfigError> {
405        let content = match path.extension().and_then(|e| e.to_str()) {
406            Some("json") => serde_json::to_string_pretty(self)
407                .map_err(|e| ConfigError::SerializeError(e.to_string()))?,
408            Some("toml") => toml::to_string_pretty(self)
409                .map_err(|e| ConfigError::SerializeError(e.to_string()))?,
410            _ => serde_yaml::to_string(self)
411                .map_err(|e| ConfigError::SerializeError(e.to_string()))?,
412        };
413
414        std::fs::write(path, content).map_err(ConfigError::IoError)
415    }
416
417    /// Generate a default configuration file
418    /// Only includes fields that the CLI actually reads to avoid misleading users
419    pub fn generate_default(format: &str) -> String {
420        // Use a minimal config with only fields the CLI actually uses
421        // This avoids confusion from fields that appear in config but are silently ignored
422        let minimal = MinimalConfig::default();
423        match format {
424            "json" => serde_json::to_string_pretty(&minimal).unwrap_or_default(),
425            "toml" => toml::to_string_pretty(&minimal).unwrap_or_default(),
426            _ => serde_yaml::to_string(&minimal).unwrap_or_default(),
427        }
428    }
429}
430
431/// Minimal configuration with only fields the CLI actually uses
432/// This prevents user confusion from config fields that are silently ignored
433#[derive(Debug, Clone, Serialize, Deserialize)]
434struct MinimalConfig {
435    /// Output settings
436    output: MinimalOutputConfig,
437    /// Scan settings
438    scan: MinimalScanConfig,
439    /// Security settings
440    security: MinimalSecurityConfig,
441    /// Include test files
442    #[serde(skip_serializing_if = "is_false")]
443    include_tests: bool,
444    /// Include documentation files
445    #[serde(skip_serializing_if = "is_false")]
446    include_docs: bool,
447}
448
449fn is_false(b: &bool) -> bool {
450    !*b
451}
452
453#[derive(Debug, Clone, Serialize, Deserialize)]
454struct MinimalOutputConfig {
455    /// Output format: xml, markdown, json
456    format: String,
457    /// Target model: claude, gpt4o, gpt4, gemini, llama
458    model: String,
459    /// Compression: none, minimal, balanced, aggressive, extreme, semantic
460    compression: String,
461    /// Token budget (0 = unlimited)
462    token_budget: u32,
463    /// Show line numbers in output
464    line_numbers: bool,
465    /// Include directory structure
466    show_directory_structure: bool,
467    /// Include file summary
468    show_file_summary: bool,
469}
470
471#[derive(Debug, Clone, Serialize, Deserialize)]
472struct MinimalScanConfig {
473    /// Include glob patterns
474    include: Vec<String>,
475    /// Exclude glob patterns
476    exclude: Vec<String>,
477}
478
479#[derive(Debug, Clone, Serialize, Deserialize)]
480struct MinimalSecurityConfig {
481    /// Enable secret scanning
482    scan_secrets: bool,
483    /// Fail if secrets detected
484    fail_on_secrets: bool,
485    /// Redact secrets in output
486    redact_secrets: bool,
487    /// Patterns to allowlist
488    #[serde(skip_serializing_if = "Vec::is_empty")]
489    allowlist: Vec<String>,
490    /// Custom secret patterns (regex)
491    #[serde(skip_serializing_if = "Vec::is_empty")]
492    custom_patterns: Vec<String>,
493}
494
495impl Default for MinimalConfig {
496    fn default() -> Self {
497        Self {
498            output: MinimalOutputConfig {
499                format: "xml".to_owned(),
500                model: "claude".to_owned(),
501                compression: "balanced".to_owned(),
502                token_budget: 0,
503                line_numbers: true,
504                show_directory_structure: true,
505                show_file_summary: true,
506            },
507            scan: MinimalScanConfig {
508                include: vec![],
509                exclude: vec![
510                    "**/node_modules/**".to_owned(),
511                    "**/.git/**".to_owned(),
512                    "**/target/**".to_owned(),
513                    "**/__pycache__/**".to_owned(),
514                    "**/dist/**".to_owned(),
515                    "**/build/**".to_owned(),
516                ],
517            },
518            security: MinimalSecurityConfig {
519                scan_secrets: true,
520                fail_on_secrets: false,
521                redact_secrets: true,
522                allowlist: vec![],
523                custom_patterns: vec![],
524            },
525            include_tests: false,
526            include_docs: false,
527        }
528    }
529}
530
531impl Config {
532    /// Get the effective number of threads
533    pub fn effective_threads(&self) -> usize {
534        if self.performance.threads == 0 {
535            std::thread::available_parallelism()
536                .map(|p| p.get())
537                .unwrap_or(4)
538        } else {
539            self.performance.threads
540        }
541    }
542}
543
544/// Configuration errors
545#[derive(Debug, Error)]
546pub enum ConfigError {
547    #[error("Configuration parse error: {0}")]
548    ParseError(#[from] figment::Error),
549    #[error("Configuration I/O error: {0}")]
550    IoError(#[from] std::io::Error),
551    #[error("Configuration serialize error: {0}")]
552    SerializeError(String),
553}
554
555/// Parse a size string like "100KB", "1MB", "500" into bytes
556fn parse_size(s: &str) -> Option<u64> {
557    let s = s.trim().to_uppercase();
558
559    // Try parsing as plain number first
560    if let Ok(n) = s.parse::<u64>() {
561        return Some(n);
562    }
563
564    // Parse with suffix
565    let (num_str, multiplier) = if s.ends_with("KB") || s.ends_with("K") {
566        (s.trim_end_matches("KB").trim_end_matches('K'), 1024u64)
567    } else if s.ends_with("MB") || s.ends_with("M") {
568        (s.trim_end_matches("MB").trim_end_matches('M'), 1024 * 1024)
569    } else if s.ends_with("GB") || s.ends_with("G") {
570        (s.trim_end_matches("GB").trim_end_matches('G'), 1024 * 1024 * 1024)
571    } else if s.ends_with('B') {
572        (s.trim_end_matches('B'), 1)
573    } else {
574        return None;
575    };
576
577    num_str.trim().parse::<u64>().ok().map(|n| n * multiplier)
578}
579
580// Provide dirs crate functionality inline if not available
581mod dirs {
582    use std::path::PathBuf;
583
584    pub(super) fn home_dir() -> Option<PathBuf> {
585        std::env::var_os("HOME")
586            .or_else(|| std::env::var_os("USERPROFILE"))
587            .map(PathBuf::from)
588    }
589}
590
591#[cfg(test)]
592mod tests {
593    use super::*;
594
595    #[test]
596    fn test_default_config() {
597        let config = Config::default();
598        assert_eq!(config.version, 1);
599        assert!(config.scan.respect_gitignore);
600        assert_eq!(config.output.format, "xml");
601    }
602
603    #[test]
604    fn test_parse_size() {
605        assert_eq!(parse_size("100"), Some(100));
606        assert_eq!(parse_size("100B"), Some(100));
607        assert_eq!(parse_size("1KB"), Some(1024));
608        assert_eq!(parse_size("1K"), Some(1024));
609        assert_eq!(parse_size("10MB"), Some(10 * 1024 * 1024));
610        assert_eq!(parse_size("1GB"), Some(1024 * 1024 * 1024));
611        assert_eq!(parse_size("invalid"), None);
612    }
613
614    #[test]
615    fn test_generate_default_yaml() {
616        let yaml = Config::generate_default("yaml");
617        // MinimalConfig contains output and scan sections
618        assert!(yaml.contains("output:"));
619        assert!(yaml.contains("scan:"));
620        assert!(yaml.contains("format:"));
621    }
622
623    #[test]
624    fn test_generate_default_toml() {
625        let toml = Config::generate_default("toml");
626        // MinimalConfig contains output and scan sections
627        assert!(toml.contains("[output]"));
628        assert!(toml.contains("[scan]"));
629    }
630
631    #[test]
632    fn test_generate_default_json() {
633        let json = Config::generate_default("json");
634        // MinimalConfig contains output and scan sections
635        assert!(json.contains("\"output\""));
636        assert!(json.contains("\"scan\""));
637    }
638
639    #[test]
640    fn test_effective_threads() {
641        let mut config = Config::default();
642        config.performance.threads = 0;
643        assert!(config.effective_threads() > 0);
644
645        config.performance.threads = 8;
646        assert_eq!(config.effective_threads(), 8);
647    }
648}