cadi_core/atomizer/
config.rs

1//! Atomizer configuration
2
3use serde::{Deserialize, Serialize};
4use std::collections::HashMap;
5
6/// Configuration for the atomizer
7#[derive(Debug, Clone, Serialize, Deserialize)]
8pub struct AtomizerConfig {
9    /// Minimum lines for a function to be its own atom
10    #[serde(default = "default_min_function_lines")]
11    pub min_function_lines: usize,
12
13    /// Minimum lines for a file to be split into atoms
14    #[serde(default = "default_min_file_lines")]
15    pub min_file_lines_to_split: usize,
16
17    /// Maximum lines per atom before forcing a split
18    #[serde(default = "default_max_atom_lines")]
19    pub max_atom_lines: usize,
20
21    /// Whether to extract doc comments as part of the atom
22    #[serde(default = "default_true")]
23    pub include_doc_comments: bool,
24
25    /// Whether to include type definitions referenced by functions
26    #[serde(default = "default_true")]
27    pub include_type_context: bool,
28
29    /// Depth of dependency resolution
30    #[serde(default = "default_resolution_depth")]
31    pub resolution_depth: usize,
32
33    /// Language-specific configurations
34    #[serde(default)]
35    pub languages: HashMap<String, LanguageConfig>,
36
37    /// Namespace for generated aliases
38    #[serde(skip_serializing_if = "Option::is_none")]
39    pub namespace: Option<String>,
40}
41
42fn default_min_function_lines() -> usize { 5 }
43fn default_min_file_lines() -> usize { 30 }
44fn default_max_atom_lines() -> usize { 300 }
45fn default_resolution_depth() -> usize { 2 }
46fn default_true() -> bool { true }
47
48impl Default for AtomizerConfig {
49    fn default() -> Self {
50        let mut languages = HashMap::new();
51        // Common web & systems languages enabled by default
52        languages.insert("tsx".to_string(), LanguageConfig { extensions: vec!["tsx".to_string()], enabled: true, ..Default::default() });
53        languages.insert("jsx".to_string(), LanguageConfig { extensions: vec!["jsx".to_string()], enabled: true, ..Default::default() });
54        languages.insert("html".to_string(), LanguageConfig { extensions: vec!["html".to_string(), "htm".to_string()], enabled: true, ..Default::default() });
55        languages.insert("css".to_string(), LanguageConfig { extensions: vec!["css".to_string()], enabled: true, ..Default::default() });
56        languages.insert("c".to_string(), LanguageConfig { extensions: vec!["c".to_string()], enabled: true, ..Default::default() });
57        languages.insert("cpp".to_string(), LanguageConfig { extensions: vec!["cpp".to_string(), "cc".to_string(), "cxx".to_string()], enabled: true, ..Default::default() });
58        languages.insert("csharp".to_string(), LanguageConfig { extensions: vec!["cs".to_string()], enabled: true, ..Default::default() });
59        languages.insert("glsl".to_string(), LanguageConfig { extensions: vec!["glsl".to_string()], enabled: true, ..Default::default() });
60        languages.insert("wgsl".to_string(), LanguageConfig { extensions: vec!["wgsl".to_string()], enabled: true, ..Default::default() });
61
62        Self {
63            min_function_lines: default_min_function_lines(),
64            min_file_lines_to_split: default_min_file_lines(),
65            max_atom_lines: default_max_atom_lines(),
66            include_doc_comments: true,
67            include_type_context: true,
68            resolution_depth: default_resolution_depth(),
69            languages,
70            namespace: None,
71        }
72    }
73}
74
75impl AtomizerConfig {
76    /// Create a config optimized for minimal atoms
77    pub fn minimal() -> Self {
78        Self {
79            min_function_lines: 1,
80            min_file_lines_to_split: 10,
81            max_atom_lines: 100,
82            ..Default::default()
83        }
84    }
85
86    /// Create a config for coarse-grained atoms
87    pub fn coarse() -> Self {
88        Self {
89            min_function_lines: 20,
90            min_file_lines_to_split: 100,
91            max_atom_lines: 500,
92            ..Default::default()
93        }
94    }
95
96    /// Set the namespace
97    pub fn with_namespace(mut self, namespace: impl Into<String>) -> Self {
98        self.namespace = Some(namespace.into());
99        self
100    }
101}
102
103/// Language-specific atomizer configuration
104#[derive(Debug, Clone, Default, Serialize, Deserialize)]
105pub struct LanguageConfig {
106    /// File extensions for this language
107    #[serde(default)]
108    pub extensions: Vec<String>,
109
110    /// Whether to treat this language specially
111    #[serde(default)]
112    pub enabled: bool,
113
114    /// Custom atom boundaries (e.g., specific decorators in Python)
115    #[serde(default)]
116    pub custom_boundaries: Vec<String>,
117
118    /// Symbols to always include in context
119    #[serde(default)]
120    pub always_include: Vec<String>,
121
122    /// Patterns to ignore
123    #[serde(default)]
124    pub ignore_patterns: Vec<String>,
125}
126
127impl LanguageConfig {
128    pub fn rust() -> Self {
129        Self {
130            extensions: vec!["rs".to_string()],
131            enabled: true,
132            custom_boundaries: vec![
133                "#[test]".to_string(),
134                "#[cfg(test)]".to_string(),
135            ],
136            always_include: vec![],
137            ignore_patterns: vec![],
138        }
139    }
140
141    pub fn typescript() -> Self {
142        Self {
143            extensions: vec![
144                "ts".to_string(),
145                "tsx".to_string(),
146                "js".to_string(),
147                "jsx".to_string(),
148            ],
149            enabled: true,
150            custom_boundaries: vec![
151                "export default".to_string(),
152                "export function".to_string(),
153                "export class".to_string(),
154            ],
155            always_include: vec![],
156            ignore_patterns: vec![],
157        }
158    }
159
160    pub fn python() -> Self {
161        Self {
162            extensions: vec!["py".to_string(), "pyi".to_string()],
163            enabled: true,
164            custom_boundaries: vec![
165                "def ".to_string(),
166                "class ".to_string(),
167                "@".to_string(), // decorators
168            ],
169            always_include: vec![],
170            ignore_patterns: vec!["__pycache__".to_string()],
171        }
172    }
173
174    pub fn tsx() -> Self {
175        Self {
176            extensions: vec!["tsx".to_string()],
177            enabled: true,
178            ..Default::default()
179        }
180    }
181
182    pub fn jsx() -> Self {
183        Self {
184            extensions: vec!["jsx".to_string()],
185            enabled: true,
186            ..Default::default()
187        }
188    }
189
190    pub fn html() -> Self {
191        Self {
192            extensions: vec!["html".to_string(), "htm".to_string()],
193            enabled: true,
194            ..Default::default()
195        }
196    }
197
198    pub fn css() -> Self {
199        Self {
200            extensions: vec!["css".to_string()],
201            enabled: true,
202            ..Default::default()
203        }
204    }
205
206    pub fn c() -> Self {
207        Self {
208            extensions: vec!["c".to_string()],
209            enabled: true,
210            ..Default::default()
211        }
212    }
213
214    pub fn cpp() -> Self {
215        Self {
216            extensions: vec!["cpp".to_string(), "cc".to_string(), "cxx".to_string()],
217            enabled: true,
218            ..Default::default()
219        }
220    }
221
222    pub fn csharp() -> Self {
223        Self {
224            extensions: vec!["cs".to_string()],
225            enabled: true,
226            ..Default::default()
227        }
228    }
229
230    pub fn glsl() -> Self {
231        Self {
232            extensions: vec!["glsl".to_string()],
233            enabled: true,
234            ..Default::default()
235        }
236    }
237
238    pub fn wgsl() -> Self {
239        Self {
240            extensions: vec!["wgsl".to_string()],
241            enabled: true,
242            ..Default::default()
243        }
244    }
245}