Skip to main content

agent_atlas/config/
mod.rs

1//! Configuration types and defaults
2
3use anyhow::Result;
4use serde::{Deserialize, Serialize};
5use std::fs;
6use std::path::Path;
7
8/// Root configuration
9#[derive(Debug, Clone, Default, Serialize, Deserialize)]
10#[serde(default)]
11pub struct Config {
12    pub scan: ScanConfig,
13    pub extract: ExtractConfig,
14    pub analyze: AnalyzeConfig,
15    pub render: RenderConfig,
16}
17
18impl Config {
19    pub fn load(atlas_dir: &Path) -> Result<Self> {
20        let config_path = atlas_dir.join("config.toml");
21        if !config_path.exists() {
22            return Ok(Self::default());
23        }
24
25        let content = match fs::read_to_string(&config_path) {
26            Ok(content) => content,
27            Err(_) => return Ok(Self::default()),
28        };
29
30        match toml::from_str(&content) {
31            Ok(config) => Ok(config),
32            Err(_) => Ok(Self::default()),
33        }
34    }
35
36    pub fn load_explicit(atlas_dir: &Path) -> Result<Self> {
37        let config_path = atlas_dir.join("config.toml");
38        let content = fs::read_to_string(&config_path)?;
39        Ok(toml::from_str(&content)?)
40    }
41}
42
43/// Scanning configuration
44#[derive(Debug, Clone, Serialize, Deserialize)]
45#[serde(default)]
46pub struct ScanConfig {
47    /// Patterns to ignore (in addition to .gitignore)
48    pub ignore: Vec<String>,
49    /// File extensions to index
50    pub include_extensions: Vec<String>,
51}
52
53pub const DEFAULT_INCLUDE_EXTENSIONS: &[&str] = &[
54    // Prose
55    "md", "txt", "pdf", "rst", "org", // Code
56    "rs", "ts", "tsx", "js", "jsx", "mjs", "cjs", // Common config/text
57    "json", "yml", "yaml", "toml", "sh", "sql",
58];
59
60fn default_include_extensions() -> Vec<String> {
61    DEFAULT_INCLUDE_EXTENSIONS
62        .iter()
63        .map(|ext| (*ext).to_string())
64        .collect()
65}
66
67impl Default for ScanConfig {
68    fn default() -> Self {
69        Self {
70            ignore: vec![
71                ".git".to_string(),
72                ".atlas".to_string(),
73                "node_modules".to_string(),
74                "__pycache__".to_string(),
75                "*.pyc".to_string(),
76                ".DS_Store".to_string(),
77            ],
78            include_extensions: default_include_extensions(),
79        }
80    }
81}
82
83/// Text extraction configuration
84#[derive(Debug, Clone, Serialize, Deserialize)]
85#[serde(default)]
86pub struct ExtractConfig {
87    /// Max file size to process (bytes)
88    pub max_file_size: usize,
89    /// Snippet length (chars)
90    pub snippet_length: usize,
91    /// Path to pdftotext binary (auto-detected if not set)
92    pub pdftotext_path: Option<String>,
93}
94
95impl Default for ExtractConfig {
96    fn default() -> Self {
97        Self {
98            max_file_size: 10_000_000, // 10MB
99            snippet_length: 400,
100            pdftotext_path: None,
101        }
102    }
103}
104
105/// Analysis configuration
106#[derive(Debug, Clone, Serialize, Deserialize)]
107#[serde(default)]
108pub struct AnalyzeConfig {
109    /// Number of top terms per file
110    pub top_terms: usize,
111    /// Number of top phrases per file
112    pub top_phrases: usize,
113    /// Minimum term length
114    pub min_term_length: usize,
115    /// Maximum term length
116    pub max_term_length: usize,
117    /// Maximum digit ratio allowed in a term (0.0-1.0)
118    pub max_digit_ratio: f32,
119    /// Minimum document frequency for a term to be indexed
120    pub min_df: usize,
121    /// Maximum document frequency ratio (0.0-1.0)
122    pub max_df_ratio: f32,
123    /// Custom stopwords
124    pub custom_stopwords: Vec<String>,
125}
126
127impl Default for AnalyzeConfig {
128    fn default() -> Self {
129        Self {
130            top_terms: 20,
131            top_phrases: 10,
132            min_term_length: 3,
133            max_term_length: 25,
134            max_digit_ratio: 0.4,
135            min_df: 2,
136            max_df_ratio: 0.5,
137            custom_stopwords: vec![],
138        }
139    }
140}
141
142/// Rendering configuration
143#[derive(Debug, Clone, Serialize, Deserialize)]
144#[serde(default)]
145pub struct RenderConfig {
146    /// Folder depth in ROOT_ATLAS.md
147    pub atlas_folder_depth: usize,
148    /// Max files to list per folder in atlas
149    pub atlas_max_files_per_folder: usize,
150}
151
152impl Default for RenderConfig {
153    fn default() -> Self {
154        Self {
155            atlas_folder_depth: 3,
156            atlas_max_files_per_folder: 10,
157        }
158    }
159}
160
161#[cfg(test)]
162mod tests {
163    use super::{Config, DEFAULT_INCLUDE_EXTENSIONS};
164    use std::fs;
165    use tempfile::tempdir;
166
167    #[test]
168    fn load_missing_returns_default() {
169        let dir = tempdir().expect("tempdir should work");
170        let config = Config::load(dir.path()).expect("load should succeed");
171        let default = Config::default();
172
173        assert_eq!(config.scan.ignore, default.scan.ignore);
174        assert_eq!(
175            config.extract.snippet_length,
176            default.extract.snippet_length
177        );
178    }
179
180    #[test]
181    fn load_malformed_returns_default() {
182        let dir = tempdir().expect("tempdir should work");
183        let config_path = dir.path().join("config.toml");
184        fs::write(&config_path, "[scan]\nignore = [\"oops\"").expect("write should succeed");
185
186        let config = Config::load(dir.path()).expect("load should succeed");
187        let default = Config::default();
188
189        assert_eq!(config.analyze.top_terms, default.analyze.top_terms);
190        assert_eq!(
191            config.render.atlas_folder_depth,
192            default.render.atlas_folder_depth
193        );
194    }
195
196    #[test]
197    fn load_valid_overrides_fields() {
198        let dir = tempdir().expect("tempdir should work");
199        let config_path = dir.path().join("config.toml");
200        let content = r#"
201[scan]
202ignore = ["target"]
203include_extensions = ["js", "json"]
204
205[extract]
206max_file_size = 1234
207snippet_length = 321
208
209[analyze]
210top_terms = 7
211top_phrases = 4
212min_term_length = 5
213custom_stopwords = ["alpha", "beta"]
214
215[render]
216atlas_folder_depth = 2
217atlas_max_files_per_folder = 3
218"#;
219        fs::write(&config_path, content).expect("write should succeed");
220
221        let config = Config::load(dir.path()).expect("load should succeed");
222
223        assert_eq!(config.scan.ignore, vec!["target".to_string()]);
224        assert_eq!(
225            config.scan.include_extensions,
226            vec!["js".to_string(), "json".to_string()]
227        );
228        assert_eq!(config.extract.snippet_length, 321);
229        assert_eq!(config.analyze.top_terms, 7);
230        assert_eq!(config.analyze.custom_stopwords, vec!["alpha", "beta"]);
231        assert_eq!(config.render.atlas_max_files_per_folder, 3);
232    }
233
234    #[test]
235    fn load_explicit_reports_missing_or_invalid_config() {
236        let dir = tempdir().expect("tempdir should work");
237        let missing = Config::load_explicit(dir.path());
238        assert!(missing.is_err());
239
240        let config_path = dir.path().join("config.toml");
241        fs::write(&config_path, "[scan]\ninclude_extensions = [\"md\"").expect("write should work");
242
243        let malformed = Config::load_explicit(dir.path());
244        assert!(malformed.is_err());
245    }
246
247    #[test]
248    fn default_scan_extensions_cover_common_repo_files() {
249        let config = Config::default();
250        let expected: Vec<String> = DEFAULT_INCLUDE_EXTENSIONS
251            .iter()
252            .map(|ext| (*ext).to_string())
253            .collect();
254
255        assert_eq!(config.scan.include_extensions, expected);
256    }
257}