Skip to main content

codesize/
config.rs

1use serde::Deserialize;
2use std::collections::{HashMap, HashSet};
3use std::path::PathBuf;
4
5#[derive(Debug, Clone, Deserialize)]
6pub struct LangLimits {
7    pub file: usize,
8    pub function: usize,
9}
10
11#[derive(Debug, Clone, Deserialize, Default)]
12struct ScanOverrides {
13    /// Respect .gitignore / .ignore / global git excludes (default: false).
14    respect_gitignore: Option<bool>,
15    /// Additional gitignore-style filenames to look for in every directory
16    /// (e.g. [".npmignore", ".dockerignore"]).  Applied regardless of
17    /// `respect_gitignore`.
18    respect_ignore_files: Option<Vec<String>>,
19    /// Paths to explicit gitignore-pattern files to apply during the walk
20    /// (e.g. ["~/.globalignore"]).  Applied regardless of `respect_gitignore`.
21    ignore_files: Option<Vec<String>>,
22    /// Default CSV output path used when --output is not passed on the CLI.
23    default_output_file: Option<String>,
24    /// Replaces the built-in skip-directory list when present.
25    skip_dirs: Option<Vec<String>>,
26    /// Replaces the built-in skip-suffix list when present.
27    skip_suffixes: Option<Vec<String>>,
28}
29
30#[derive(Debug, Clone, Deserialize, Default)]
31struct FileConfig {
32    #[serde(default)]
33    limits: HashMap<String, LangLimits>,
34    #[serde(default)]
35    scan: ScanOverrides,
36    /// Maps file extensions (with leading dot, e.g. `".rb"`) to language names.
37    /// Any language name is valid; if no tree-sitter grammar exists for it,
38    /// only the file-length limit is enforced (no function analysis).
39    #[serde(default)]
40    languages: HashMap<String, String>,
41}
42
43pub struct Config {
44    pub limits: HashMap<String, LangLimits>,
45    /// User-defined extension → language name mappings (extension includes leading dot,
46    /// lowercased). Consulted after the built-in extension table.
47    pub extra_languages: HashMap<String, String>,
48    pub skip_dirs: HashSet<String>,
49    pub skip_suffixes: HashSet<String>,
50    pub respect_gitignore: bool,
51    pub respect_ignore_files: Vec<String>,
52    pub ignore_files: Vec<String>,
53    pub default_output_file: String,
54}
55
56fn default_limits() -> HashMap<String, LangLimits> {
57    [
58        ("Rust", 500, 80),
59        ("TypeScript", 300, 40),
60        ("JavaScript", 300, 40),
61        ("Python", 300, 30),
62        ("Go", 400, 60),
63        ("Java", 300, 30),
64        ("C", 500, 60),
65        ("C++", 400, 60),
66        ("Swift", 400, 50),
67        ("Lua", 400, 50),
68    ]
69    .into_iter()
70    .map(|(lang, file, function)| (lang.to_string(), LangLimits { file, function }))
71    .collect()
72}
73
74fn default_skip_dirs() -> HashSet<String> {
75    [".git", ".venv", "node_modules", "target", "dist", "build"]
76        .iter()
77        .map(|s| s.to_string())
78        .collect()
79}
80
81fn default_skip_suffixes() -> HashSet<String> {
82    [
83        ".d.ts", ".min.js", ".min.ts", ".min.mjs", "_pb2.py", "_pb.go", ".pb.go",
84    ]
85    .iter()
86    .map(|s| s.to_string())
87    .collect()
88}
89
90/// Returns the config file path, preferring $XDG_CONFIG_HOME over ~/.config.
91fn config_path() -> Option<PathBuf> {
92    let base = std::env::var_os("XDG_CONFIG_HOME")
93        .map(PathBuf::from)
94        .or_else(|| dirs::home_dir().map(|h| h.join(".config")))?;
95    Some(base.join("codesize").join("config.toml"))
96}
97
98/// Loads configuration from the XDG config file, merging with built-in defaults.
99///
100/// - Individual language limits are overridden per-entry; others keep defaults.
101/// - `skip_dirs` / `skip_suffixes` replace the defaults when present.
102/// - `respect_ignore_files` / `ignore_files` extend the walk with additional
103///   gitignore-style rules and are empty by default.
104pub fn load_config() -> Config {
105    let file_cfg: FileConfig = config_path()
106        .and_then(|p| std::fs::read_to_string(p).ok())
107        .and_then(|s| toml::from_str(&s).ok())
108        .unwrap_or_default();
109
110    let mut limits = default_limits();
111    for (lang, overrides) in file_cfg.limits {
112        limits.insert(lang, overrides);
113    }
114
115    let skip_dirs = file_cfg
116        .scan
117        .skip_dirs
118        .map(|v| v.into_iter().collect())
119        .unwrap_or_else(default_skip_dirs);
120
121    let skip_suffixes = file_cfg
122        .scan
123        .skip_suffixes
124        .map(|v| v.into_iter().collect())
125        .unwrap_or_else(default_skip_suffixes);
126
127    // Normalize user-supplied extension keys to lowercase with a leading dot.
128    let extra_languages = file_cfg
129        .languages
130        .into_iter()
131        .map(|(ext, lang)| {
132            let ext = ext.trim().to_lowercase();
133            let ext = if ext.starts_with('.') {
134                ext
135            } else {
136                format!(".{ext}")
137            };
138            (ext, lang)
139        })
140        .collect();
141
142    Config {
143        limits,
144        extra_languages,
145        skip_dirs,
146        skip_suffixes,
147        respect_gitignore: file_cfg.scan.respect_gitignore.unwrap_or(false),
148        respect_ignore_files: file_cfg.scan.respect_ignore_files.unwrap_or_default(),
149        ignore_files: file_cfg.scan.ignore_files.unwrap_or_default(),
150        default_output_file: file_cfg
151            .scan
152            .default_output_file
153            .unwrap_or_else(|| "codesize.csv".to_string()),
154    }
155}
156
157#[cfg(test)]
158mod tests {
159    use super::*;
160
161    #[test]
162    fn default_config_has_all_languages() {
163        let cfg = load_config();
164        for lang in &[
165            "Rust",
166            "Python",
167            "Go",
168            "TypeScript",
169            "JavaScript",
170            "Java",
171            "C",
172            "C++",
173            "Swift",
174            "Lua",
175        ] {
176            assert!(cfg.limits.contains_key(*lang), "Missing limits for {lang}");
177        }
178    }
179
180    #[test]
181    fn default_config_has_skip_dirs() {
182        let cfg = load_config();
183        assert!(cfg.skip_dirs.contains(".git"));
184        assert!(cfg.skip_dirs.contains("node_modules"));
185    }
186
187    #[test]
188    fn default_config_has_skip_suffixes() {
189        let cfg = load_config();
190        assert!(cfg.skip_suffixes.contains(".d.ts"));
191        assert!(cfg.skip_suffixes.contains(".min.js"));
192    }
193
194    #[test]
195    fn default_config_gitignore_off() {
196        let cfg = load_config();
197        assert!(!cfg.respect_gitignore);
198        assert!(cfg.respect_ignore_files.is_empty());
199        assert!(cfg.ignore_files.is_empty());
200    }
201
202    #[test]
203    fn default_output_file_is_csv() {
204        let cfg = load_config();
205        assert_eq!(cfg.default_output_file, "codesize.csv");
206    }
207
208    #[test]
209    fn toml_override_replaces_single_limit() {
210        let toml = r#"
211[limits.Rust]
212file = 999
213function = 99
214"#;
215        let file_cfg: FileConfig = toml::from_str(toml).unwrap();
216        let mut limits = default_limits();
217        for (lang, ov) in file_cfg.limits {
218            limits.insert(lang, ov);
219        }
220        let rust = &limits["Rust"];
221        assert_eq!(rust.file, 999);
222        assert_eq!(rust.function, 99);
223        assert_eq!(limits["Python"].file, 300);
224    }
225}