aurora_semantic/
config.rs

1//! Configuration types for the aurora-semantic engine.
2
3use serde::{Deserialize, Serialize};
4use std::path::PathBuf;
5
6use crate::types::Language;
7
8/// Main configuration for the semantic search engine.
9#[derive(Debug, Clone, Serialize, Deserialize)]
10pub struct EngineConfig {
11    /// Directory where indexes are stored.
12    pub index_dir: PathBuf,
13    /// Chunking configuration.
14    pub chunking: ChunkingConfig,
15    /// Embedding configuration.
16    pub embedding: EmbeddingConfig,
17    /// Search configuration.
18    pub search: SearchConfig,
19    /// Ignore patterns configuration.
20    pub ignore: IgnoreConfig,
21    /// Performance tuning.
22    pub performance: PerformanceConfig,
23}
24
25impl EngineConfig {
26    /// Create a new configuration with the given index directory.
27    pub fn new(index_dir: PathBuf) -> Self {
28        Self {
29            index_dir,
30            chunking: ChunkingConfig::default(),
31            embedding: EmbeddingConfig::default(),
32            search: SearchConfig::default(),
33            ignore: IgnoreConfig::default(),
34            performance: PerformanceConfig::default(),
35        }
36    }
37
38    /// Builder-style method to set chunking config.
39    pub fn with_chunking(mut self, config: ChunkingConfig) -> Self {
40        self.chunking = config;
41        self
42    }
43
44    /// Builder-style method to set embedding config.
45    pub fn with_embedding(mut self, config: EmbeddingConfig) -> Self {
46        self.embedding = config;
47        self
48    }
49
50    /// Builder-style method to set search config.
51    pub fn with_search(mut self, config: SearchConfig) -> Self {
52        self.search = config;
53        self
54    }
55
56    /// Builder-style method to set ignore config.
57    pub fn with_ignore(mut self, config: IgnoreConfig) -> Self {
58        self.ignore = config;
59        self
60    }
61
62    /// Builder-style method to set performance config.
63    pub fn with_performance(mut self, config: PerformanceConfig) -> Self {
64        self.performance = config;
65        self
66    }
67}
68
69impl Default for EngineConfig {
70    fn default() -> Self {
71        Self::new(PathBuf::from(".aurora"))
72    }
73}
74
75/// Configuration for code chunking.
76#[derive(Debug, Clone, Serialize, Deserialize)]
77pub struct ChunkingConfig {
78    /// Maximum chunk size in characters.
79    pub max_chunk_size: usize,
80    /// Minimum chunk size in characters.
81    pub min_chunk_size: usize,
82    /// Whether to extract documentation comments.
83    pub extract_comments: bool,
84}
85
86impl Default for ChunkingConfig {
87    fn default() -> Self {
88        Self {
89            max_chunk_size: 2000,
90            min_chunk_size: 50,
91            extract_comments: true,
92        }
93    }
94}
95
96/// Configuration for embedding generation.
97#[derive(Debug, Clone, Serialize, Deserialize)]
98pub struct EmbeddingConfig {
99    /// Embedding dimension.
100    pub dimension: usize,
101    /// Batch size for embedding generation.
102    pub batch_size: usize,
103    /// Maximum sequence length.
104    pub max_length: usize,
105    /// Whether to normalize embeddings.
106    pub normalize: bool,
107}
108
109impl Default for EmbeddingConfig {
110    fn default() -> Self {
111        Self {
112            dimension: 768, // Common for code models like jina-code
113            batch_size: 32,
114            max_length: 512,
115            normalize: true,
116        }
117    }
118}
119
120/// Configuration for search behavior.
121#[derive(Debug, Clone, Serialize, Deserialize)]
122pub struct SearchConfig {
123    /// Default number of results to return.
124    pub default_limit: usize,
125    /// Maximum number of results to return.
126    pub max_limit: usize,
127    /// Default search mode.
128    pub default_mode: SearchMode,
129    /// Weight for lexical results in hybrid search (0.0 to 1.0).
130    pub lexical_weight: f32,
131    /// Weight for semantic results in hybrid search (0.0 to 1.0).
132    pub semantic_weight: f32,
133    /// Minimum score threshold for results (0.0 to 1.0).
134    pub min_score: f32,
135    /// Enable fuzzy matching in lexical search.
136    pub fuzzy_matching: bool,
137    /// Fuzzy matching distance (edit distance).
138    pub fuzzy_distance: u8,
139}
140
141impl Default for SearchConfig {
142    fn default() -> Self {
143        Self {
144            default_limit: 20,
145            max_limit: 100,
146            default_mode: SearchMode::Hybrid,
147            lexical_weight: 0.4,
148            semantic_weight: 0.6,
149            min_score: 0.1,
150            fuzzy_matching: true,
151            fuzzy_distance: 2,
152        }
153    }
154}
155
156/// Search mode selection.
157#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
158pub enum SearchMode {
159    /// Keyword-based lexical search only.
160    Lexical,
161    /// Embedding-based semantic search only.
162    Semantic,
163    /// Combined lexical and semantic search.
164    Hybrid,
165}
166
167impl Default for SearchMode {
168    fn default() -> Self {
169        Self::Hybrid
170    }
171}
172
173/// Configuration for ignore patterns.
174#[derive(Debug, Clone, Serialize, Deserialize)]
175pub struct IgnoreConfig {
176    /// Respect .gitignore files.
177    pub use_gitignore: bool,
178    /// Respect .ignore files.
179    pub use_ignore_files: bool,
180    /// Additional patterns to ignore.
181    pub patterns: Vec<String>,
182    /// File extensions to ignore.
183    pub ignored_extensions: Vec<String>,
184    /// Directories to always ignore.
185    pub ignored_directories: Vec<String>,
186    /// Maximum file size to index (in bytes).
187    pub max_file_size: u64,
188}
189
190impl Default for IgnoreConfig {
191    fn default() -> Self {
192        Self {
193            use_gitignore: true,
194            use_ignore_files: true,
195            patterns: vec![],
196            ignored_extensions: vec![
197                "exe".into(), "dll".into(), "so".into(), "dylib".into(),
198                "bin".into(), "obj".into(), "o".into(), "a".into(), "lib".into(),
199                "png".into(), "jpg".into(), "jpeg".into(), "gif".into(), "ico".into(),
200                "svg".into(), "woff".into(), "woff2".into(), "ttf".into(), "eot".into(),
201                "mp3".into(), "mp4".into(), "avi".into(), "mov".into(),
202                "zip".into(), "tar".into(), "gz".into(), "rar".into(), "7z".into(),
203                "pdf".into(), "doc".into(), "docx".into(),
204            ],
205            ignored_directories: vec![
206                "node_modules".into(), ".git".into(), ".svn".into(), ".hg".into(),
207                "target".into(), "build".into(), "dist".into(), "out".into(),
208                ".next".into(), ".nuxt".into(), "__pycache__".into(),
209                ".pytest_cache".into(), ".mypy_cache".into(),
210                "venv".into(), ".venv".into(), "vendor".into(),
211                ".idea".into(), ".vscode".into(), ".vs".into(),
212            ],
213            max_file_size: 1024 * 1024, // 1MB
214        }
215    }
216}
217
218/// Performance tuning configuration.
219#[derive(Debug, Clone, Serialize, Deserialize)]
220pub struct PerformanceConfig {
221    /// Number of threads for parallel processing.
222    pub num_threads: usize,
223    /// Memory limit for indexing (in bytes).
224    pub memory_limit: usize,
225    /// Enable incremental indexing.
226    pub incremental: bool,
227}
228
229impl Default for PerformanceConfig {
230    fn default() -> Self {
231        Self {
232            num_threads: std::thread::available_parallelism()
233                .map(|p| p.get())
234                .unwrap_or(4),
235            memory_limit: 512 * 1024 * 1024, // 512MB
236            incremental: true,
237        }
238    }
239}
240
241/// Configuration for a specific workspace.
242#[derive(Debug, Clone, Serialize, Deserialize)]
243pub struct WorkspaceConfig {
244    /// Root path of the workspace.
245    pub root_path: PathBuf,
246    /// Workspace-specific ignore patterns (in addition to global).
247    pub additional_ignores: Vec<String>,
248    /// Languages to index (None = all).
249    pub languages: Option<Vec<Language>>,
250    /// Watch for file changes.
251    pub watch_changes: bool,
252}
253
254impl WorkspaceConfig {
255    /// Create a new workspace configuration.
256    pub fn new(root_path: PathBuf) -> Self {
257        Self {
258            root_path,
259            additional_ignores: vec![],
260            languages: None,
261            watch_changes: false,
262        }
263    }
264
265    /// Builder-style method to add ignore patterns.
266    pub fn with_ignores(mut self, patterns: Vec<String>) -> Self {
267        self.additional_ignores = patterns;
268        self
269    }
270
271    /// Builder-style method to set languages.
272    pub fn with_languages(mut self, languages: Vec<Language>) -> Self {
273        self.languages = Some(languages);
274        self
275    }
276
277    /// Builder-style method to enable file watching.
278    pub fn with_watch(mut self) -> Self {
279        self.watch_changes = true;
280        self
281    }
282}