1use anyhow::Result;
4use serde::{Deserialize, Serialize};
5use std::fs;
6use std::path::Path;
7
8#[derive(Debug, Clone, Default, Serialize, Deserialize)]
10#[serde(default)]
11pub struct Config {
12 pub scan: ScanConfig,
13 pub extract: ExtractConfig,
14 pub analyze: AnalyzeConfig,
15 pub render: RenderConfig,
16}
17
18impl Config {
19 pub fn load(atlas_dir: &Path) -> Result<Self> {
20 let config_path = atlas_dir.join("config.toml");
21 if !config_path.exists() {
22 return Ok(Self::default());
23 }
24
25 let content = match fs::read_to_string(&config_path) {
26 Ok(content) => content,
27 Err(_) => return Ok(Self::default()),
28 };
29
30 match toml::from_str(&content) {
31 Ok(config) => Ok(config),
32 Err(_) => Ok(Self::default()),
33 }
34 }
35
36 pub fn load_explicit(atlas_dir: &Path) -> Result<Self> {
37 let config_path = atlas_dir.join("config.toml");
38 let content = fs::read_to_string(&config_path)?;
39 Ok(toml::from_str(&content)?)
40 }
41}
42
43#[derive(Debug, Clone, Serialize, Deserialize)]
45#[serde(default)]
46pub struct ScanConfig {
47 pub ignore: Vec<String>,
49 pub include_extensions: Vec<String>,
51}
52
53pub const DEFAULT_INCLUDE_EXTENSIONS: &[&str] = &[
54 "md", "txt", "pdf", "rst", "org", "rs", "ts", "tsx", "js", "jsx", "mjs", "cjs", "json", "yml", "yaml", "toml", "sh", "sql",
58];
59
60fn default_include_extensions() -> Vec<String> {
61 DEFAULT_INCLUDE_EXTENSIONS
62 .iter()
63 .map(|ext| (*ext).to_string())
64 .collect()
65}
66
67impl Default for ScanConfig {
68 fn default() -> Self {
69 Self {
70 ignore: vec![
71 ".git".to_string(),
72 ".atlas".to_string(),
73 "node_modules".to_string(),
74 "__pycache__".to_string(),
75 "*.pyc".to_string(),
76 ".DS_Store".to_string(),
77 ],
78 include_extensions: default_include_extensions(),
79 }
80 }
81}
82
83#[derive(Debug, Clone, Serialize, Deserialize)]
85#[serde(default)]
86pub struct ExtractConfig {
87 pub max_file_size: usize,
89 pub snippet_length: usize,
91 pub pdftotext_path: Option<String>,
93}
94
95impl Default for ExtractConfig {
96 fn default() -> Self {
97 Self {
98 max_file_size: 10_000_000, snippet_length: 400,
100 pdftotext_path: None,
101 }
102 }
103}
104
105#[derive(Debug, Clone, Serialize, Deserialize)]
107#[serde(default)]
108pub struct AnalyzeConfig {
109 pub top_terms: usize,
111 pub top_phrases: usize,
113 pub min_term_length: usize,
115 pub max_term_length: usize,
117 pub max_digit_ratio: f32,
119 pub min_df: usize,
121 pub max_df_ratio: f32,
123 pub custom_stopwords: Vec<String>,
125}
126
127impl Default for AnalyzeConfig {
128 fn default() -> Self {
129 Self {
130 top_terms: 20,
131 top_phrases: 10,
132 min_term_length: 3,
133 max_term_length: 25,
134 max_digit_ratio: 0.4,
135 min_df: 2,
136 max_df_ratio: 0.5,
137 custom_stopwords: vec![],
138 }
139 }
140}
141
142#[derive(Debug, Clone, Serialize, Deserialize)]
144#[serde(default)]
145pub struct RenderConfig {
146 pub atlas_folder_depth: usize,
148 pub atlas_max_files_per_folder: usize,
150}
151
152impl Default for RenderConfig {
153 fn default() -> Self {
154 Self {
155 atlas_folder_depth: 3,
156 atlas_max_files_per_folder: 10,
157 }
158 }
159}
160
161#[cfg(test)]
162mod tests {
163 use super::{Config, DEFAULT_INCLUDE_EXTENSIONS};
164 use std::fs;
165 use tempfile::tempdir;
166
167 #[test]
168 fn load_missing_returns_default() {
169 let dir = tempdir().expect("tempdir should work");
170 let config = Config::load(dir.path()).expect("load should succeed");
171 let default = Config::default();
172
173 assert_eq!(config.scan.ignore, default.scan.ignore);
174 assert_eq!(
175 config.extract.snippet_length,
176 default.extract.snippet_length
177 );
178 }
179
180 #[test]
181 fn load_malformed_returns_default() {
182 let dir = tempdir().expect("tempdir should work");
183 let config_path = dir.path().join("config.toml");
184 fs::write(&config_path, "[scan]\nignore = [\"oops\"").expect("write should succeed");
185
186 let config = Config::load(dir.path()).expect("load should succeed");
187 let default = Config::default();
188
189 assert_eq!(config.analyze.top_terms, default.analyze.top_terms);
190 assert_eq!(
191 config.render.atlas_folder_depth,
192 default.render.atlas_folder_depth
193 );
194 }
195
196 #[test]
197 fn load_valid_overrides_fields() {
198 let dir = tempdir().expect("tempdir should work");
199 let config_path = dir.path().join("config.toml");
200 let content = r#"
201[scan]
202ignore = ["target"]
203include_extensions = ["js", "json"]
204
205[extract]
206max_file_size = 1234
207snippet_length = 321
208
209[analyze]
210top_terms = 7
211top_phrases = 4
212min_term_length = 5
213custom_stopwords = ["alpha", "beta"]
214
215[render]
216atlas_folder_depth = 2
217atlas_max_files_per_folder = 3
218"#;
219 fs::write(&config_path, content).expect("write should succeed");
220
221 let config = Config::load(dir.path()).expect("load should succeed");
222
223 assert_eq!(config.scan.ignore, vec!["target".to_string()]);
224 assert_eq!(
225 config.scan.include_extensions,
226 vec!["js".to_string(), "json".to_string()]
227 );
228 assert_eq!(config.extract.snippet_length, 321);
229 assert_eq!(config.analyze.top_terms, 7);
230 assert_eq!(config.analyze.custom_stopwords, vec!["alpha", "beta"]);
231 assert_eq!(config.render.atlas_max_files_per_folder, 3);
232 }
233
234 #[test]
235 fn load_explicit_reports_missing_or_invalid_config() {
236 let dir = tempdir().expect("tempdir should work");
237 let missing = Config::load_explicit(dir.path());
238 assert!(missing.is_err());
239
240 let config_path = dir.path().join("config.toml");
241 fs::write(&config_path, "[scan]\ninclude_extensions = [\"md\"").expect("write should work");
242
243 let malformed = Config::load_explicit(dir.path());
244 assert!(malformed.is_err());
245 }
246
247 #[test]
248 fn default_scan_extensions_cover_common_repo_files() {
249 let config = Config::default();
250 let expected: Vec<String> = DEFAULT_INCLUDE_EXTENSIONS
251 .iter()
252 .map(|ext| (*ext).to_string())
253 .collect();
254
255 assert_eq!(config.scan.include_extensions, expected);
256 }
257}