use anyhow::Result;
use serde::{Deserialize, Serialize};
use std::fs;
use std::path::Path;
#[derive(Debug, Clone, Default, Serialize, Deserialize)]
#[serde(default)]
pub struct Config {
pub scan: ScanConfig,
pub extract: ExtractConfig,
pub analyze: AnalyzeConfig,
pub render: RenderConfig,
}
impl Config {
pub fn load(atlas_dir: &Path) -> Result<Self> {
let config_path = atlas_dir.join("config.toml");
if !config_path.exists() {
return Ok(Self::default());
}
let content = match fs::read_to_string(&config_path) {
Ok(content) => content,
Err(_) => return Ok(Self::default()),
};
match toml::from_str(&content) {
Ok(config) => Ok(config),
Err(_) => Ok(Self::default()),
}
}
pub fn load_explicit(atlas_dir: &Path) -> Result<Self> {
let config_path = atlas_dir.join("config.toml");
let content = fs::read_to_string(&config_path)?;
Ok(toml::from_str(&content)?)
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
#[serde(default)]
pub struct ScanConfig {
pub ignore: Vec<String>,
pub include_extensions: Vec<String>,
}
pub const DEFAULT_INCLUDE_EXTENSIONS: &[&str] = &[
"md", "txt", "pdf", "rst", "org", "rs", "ts", "tsx", "js", "jsx", "mjs", "cjs", "json", "yml", "yaml", "toml", "sh", "sql",
];
fn default_include_extensions() -> Vec<String> {
DEFAULT_INCLUDE_EXTENSIONS
.iter()
.map(|ext| (*ext).to_string())
.collect()
}
impl Default for ScanConfig {
fn default() -> Self {
Self {
ignore: vec![
".git".to_string(),
".atlas".to_string(),
"node_modules".to_string(),
"__pycache__".to_string(),
"*.pyc".to_string(),
".DS_Store".to_string(),
],
include_extensions: default_include_extensions(),
}
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
#[serde(default)]
pub struct ExtractConfig {
pub max_file_size: usize,
pub snippet_length: usize,
pub pdftotext_path: Option<String>,
}
impl Default for ExtractConfig {
fn default() -> Self {
Self {
max_file_size: 10_000_000, snippet_length: 400,
pdftotext_path: None,
}
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
#[serde(default)]
pub struct AnalyzeConfig {
pub top_terms: usize,
pub top_phrases: usize,
pub min_term_length: usize,
pub max_term_length: usize,
pub max_digit_ratio: f32,
pub min_df: usize,
pub max_df_ratio: f32,
pub custom_stopwords: Vec<String>,
}
impl Default for AnalyzeConfig {
fn default() -> Self {
Self {
top_terms: 20,
top_phrases: 10,
min_term_length: 3,
max_term_length: 25,
max_digit_ratio: 0.4,
min_df: 2,
max_df_ratio: 0.5,
custom_stopwords: vec![],
}
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
#[serde(default)]
pub struct RenderConfig {
pub atlas_folder_depth: usize,
pub atlas_max_files_per_folder: usize,
}
impl Default for RenderConfig {
fn default() -> Self {
Self {
atlas_folder_depth: 3,
atlas_max_files_per_folder: 10,
}
}
}
#[cfg(test)]
mod tests {
use super::{Config, DEFAULT_INCLUDE_EXTENSIONS};
use std::fs;
use tempfile::tempdir;
#[test]
fn load_missing_returns_default() {
let dir = tempdir().expect("tempdir should work");
let config = Config::load(dir.path()).expect("load should succeed");
let default = Config::default();
assert_eq!(config.scan.ignore, default.scan.ignore);
assert_eq!(
config.extract.snippet_length,
default.extract.snippet_length
);
}
#[test]
fn load_malformed_returns_default() {
let dir = tempdir().expect("tempdir should work");
let config_path = dir.path().join("config.toml");
fs::write(&config_path, "[scan]\nignore = [\"oops\"").expect("write should succeed");
let config = Config::load(dir.path()).expect("load should succeed");
let default = Config::default();
assert_eq!(config.analyze.top_terms, default.analyze.top_terms);
assert_eq!(
config.render.atlas_folder_depth,
default.render.atlas_folder_depth
);
}
#[test]
fn load_valid_overrides_fields() {
let dir = tempdir().expect("tempdir should work");
let config_path = dir.path().join("config.toml");
let content = r#"
[scan]
ignore = ["target"]
include_extensions = ["js", "json"]
[extract]
max_file_size = 1234
snippet_length = 321
[analyze]
top_terms = 7
top_phrases = 4
min_term_length = 5
custom_stopwords = ["alpha", "beta"]
[render]
atlas_folder_depth = 2
atlas_max_files_per_folder = 3
"#;
fs::write(&config_path, content).expect("write should succeed");
let config = Config::load(dir.path()).expect("load should succeed");
assert_eq!(config.scan.ignore, vec!["target".to_string()]);
assert_eq!(
config.scan.include_extensions,
vec!["js".to_string(), "json".to_string()]
);
assert_eq!(config.extract.snippet_length, 321);
assert_eq!(config.analyze.top_terms, 7);
assert_eq!(config.analyze.custom_stopwords, vec!["alpha", "beta"]);
assert_eq!(config.render.atlas_max_files_per_folder, 3);
}
#[test]
fn load_explicit_reports_missing_or_invalid_config() {
let dir = tempdir().expect("tempdir should work");
let missing = Config::load_explicit(dir.path());
assert!(missing.is_err());
let config_path = dir.path().join("config.toml");
fs::write(&config_path, "[scan]\ninclude_extensions = [\"md\"").expect("write should work");
let malformed = Config::load_explicit(dir.path());
assert!(malformed.is_err());
}
#[test]
fn default_scan_extensions_cover_common_repo_files() {
let config = Config::default();
let expected: Vec<String> = DEFAULT_INCLUDE_EXTENSIONS
.iter()
.map(|ext| (*ext).to_string())
.collect();
assert_eq!(config.scan.include_extensions, expected);
}
}