Skip to main content

directory_indexer/config/
settings.rs

1use serde::{Deserialize, Serialize};
2use std::path::PathBuf;
3
4use crate::error::Result;
5
6#[derive(Debug, Clone, Serialize, Deserialize)]
7pub struct Config {
8    pub storage: StorageConfig,
9    pub embedding: EmbeddingConfig,
10    pub indexing: IndexingConfig,
11    pub monitoring: MonitoringConfig,
12}
13
14#[derive(Debug, Clone, Serialize, Deserialize)]
15pub struct StorageConfig {
16    pub sqlite_path: PathBuf,
17    pub qdrant: QdrantConfig,
18}
19
20#[derive(Debug, Clone, Serialize, Deserialize)]
21pub struct QdrantConfig {
22    pub endpoint: String,
23    pub collection: String,
24    pub api_key: Option<String>,
25}
26
27#[derive(Debug, Clone, Serialize, Deserialize)]
28pub struct EmbeddingConfig {
29    pub provider: String,
30    pub model: String,
31    pub endpoint: String,
32    pub api_key: Option<String>,
33}
34
35#[derive(Debug, Clone, Serialize, Deserialize)]
36pub struct IndexingConfig {
37    pub chunk_size: usize,
38    pub overlap: usize,
39    pub max_file_size: u64,
40    pub ignore_patterns: Vec<String>,
41    pub concurrency: usize,
42}
43
44#[derive(Debug, Clone, Serialize, Deserialize)]
45pub struct MonitoringConfig {
46    pub file_watching: bool,
47    pub batch_size: usize,
48}
49
50impl Default for Config {
51    fn default() -> Self {
52        let app_dir = Self::default_app_dir();
53
54        Self {
55            storage: StorageConfig {
56                sqlite_path: app_dir.join("data.db"),
57                qdrant: QdrantConfig {
58                    endpoint: "http://localhost:6333".to_string(),
59                    collection: if std::env::var("CARGO_PKG_NAME").is_ok()
60                        && std::env::var("CARGO_MANIFEST_DIR").is_ok()
61                    {
62                        // We're running under cargo (likely tests or development)
63                        "directory-indexer-test".to_string()
64                    } else {
65                        "directory-indexer".to_string()
66                    },
67                    api_key: None,
68                },
69            },
70            embedding: EmbeddingConfig {
71                provider: "ollama".to_string(),
72                model: "nomic-embed-text".to_string(),
73                endpoint: "http://localhost:11434".to_string(),
74                api_key: None,
75            },
76            indexing: IndexingConfig {
77                chunk_size: 512,
78                overlap: 50,
79                max_file_size: 10 * 1024 * 1024, // 10MB
80                ignore_patterns: vec![
81                    ".git".to_string(),
82                    "node_modules".to_string(),
83                    "target".to_string(),
84                ],
85                concurrency: 4,
86            },
87            monitoring: MonitoringConfig {
88                file_watching: false,
89                batch_size: 100,
90            },
91        }
92    }
93}
94
95impl Config {
96    pub fn load() -> Result<Self> {
97        // Start with defaults
98        let mut config = Self::default();
99
100        // Ensure the app directory exists
101        config.ensure_app_dir_exists()?;
102
103        // Use environment variables as primary source
104        if let Ok(qdrant_endpoint) = std::env::var("QDRANT_ENDPOINT") {
105            config.storage.qdrant.endpoint = qdrant_endpoint;
106        }
107
108        if let Ok(ollama_endpoint) = std::env::var("OLLAMA_ENDPOINT") {
109            config.embedding.endpoint = ollama_endpoint;
110        }
111
112        // Override app directory if specified
113        if let Ok(app_dir) = std::env::var("DIRECTORY_INDEXER_DATA_DIR") {
114            let app_dir_path = PathBuf::from(app_dir);
115            config.storage.sqlite_path = app_dir_path.join("data.db");
116        }
117
118        // Handle environment variable override
119        if let Ok(qdrant_collection) = std::env::var("DIRECTORY_INDEXER_QDRANT_COLLECTION") {
120            config.storage.qdrant.collection = qdrant_collection;
121        }
122
123        // If collection name is "test" or "directory-indexer-test", make it unique per process for test isolation
124        if config.storage.qdrant.collection == "test"
125            || config.storage.qdrant.collection == "directory-indexer-test"
126        {
127            config.storage.qdrant.collection = format!(
128                "directory-indexer-test-{}-{}",
129                std::process::id(),
130                std::time::SystemTime::now()
131                    .duration_since(std::time::UNIX_EPOCH)
132                    .unwrap()
133                    .as_nanos()
134            );
135        }
136
137        if let Ok(qdrant_api_key) = std::env::var("QDRANT_API_KEY") {
138            config.storage.qdrant.api_key = Some(qdrant_api_key);
139        }
140
141        if let Ok(ollama_api_key) = std::env::var("OLLAMA_API_KEY") {
142            config.embedding.api_key = Some(ollama_api_key);
143        }
144
145        Ok(config)
146    }
147
148    pub fn save(&self) -> Result<()> {
149        let config_path = Self::default_config_path()?;
150
151        if let Some(parent) = config_path.parent() {
152            std::fs::create_dir_all(parent)?;
153        }
154
155        let json = serde_json::to_string_pretty(self)?;
156        std::fs::write(config_path, json)?;
157
158        Ok(())
159    }
160
161    fn default_app_dir() -> PathBuf {
162        std::env::var("DIRECTORY_INDEXER_DATA_DIR")
163            .map(PathBuf::from)
164            .unwrap_or_else(|_| {
165                dirs::home_dir()
166                    .unwrap_or_else(|| PathBuf::from("."))
167                    .join(".directory-indexer")
168            })
169    }
170
171    fn default_config_path() -> Result<PathBuf> {
172        Ok(Self::default_app_dir().join("config.json"))
173    }
174
175    fn ensure_app_dir_exists(&self) -> Result<()> {
176        if let Some(parent_dir) = self.storage.sqlite_path.parent() {
177            if !parent_dir.exists() {
178                std::fs::create_dir_all(parent_dir)?;
179            }
180        }
181        Ok(())
182    }
183}
184
185#[cfg(test)]
186mod tests {
187    use super::*;
188    use std::env;
189    use std::fs;
190    use tempfile::TempDir;
191
192    #[test]
193    fn test_default_config() {
194        let config = Config::default();
195
196        assert_eq!(config.storage.qdrant.endpoint, "http://localhost:6333");
197        // Collection name depends on cargo environment, test that it's one of the expected values
198        assert!(
199            config.storage.qdrant.collection == "directory-indexer-test"
200                || config.storage.qdrant.collection == "directory-indexer"
201        );
202        assert!(config.storage.qdrant.api_key.is_none());
203
204        assert_eq!(config.embedding.provider, "ollama");
205        assert_eq!(config.embedding.model, "nomic-embed-text");
206        assert_eq!(config.embedding.endpoint, "http://localhost:11434");
207        assert!(config.embedding.api_key.is_none());
208
209        assert_eq!(config.indexing.chunk_size, 512);
210        assert_eq!(config.indexing.overlap, 50);
211        assert_eq!(config.indexing.max_file_size, 10 * 1024 * 1024);
212        assert_eq!(config.indexing.concurrency, 4);
213        assert!(config
214            .indexing
215            .ignore_patterns
216            .contains(&".git".to_string()));
217
218        assert_eq!(config.monitoring.batch_size, 100);
219        assert!(!config.monitoring.file_watching);
220    }
221
222    #[test]
223    fn test_config_from_environment_variables() {
224        // Test just the non-conflicting environment variables
225        // Skip testing DIRECTORY_INDEXER_QDRANT_COLLECTION to avoid test interference
226
227        // Save original values
228        let original_qdrant = env::var("QDRANT_ENDPOINT").ok();
229        let original_ollama = env::var("OLLAMA_ENDPOINT").ok();
230        let original_qdrant_key = env::var("QDRANT_API_KEY").ok();
231        let original_ollama_key = env::var("OLLAMA_API_KEY").ok();
232
233        // Set test values for non-conflicting variables
234        env::set_var("QDRANT_ENDPOINT", "http://test-qdrant:6333");
235        env::set_var("OLLAMA_ENDPOINT", "http://test-ollama:11434");
236        env::set_var("QDRANT_API_KEY", "test-qdrant-key");
237        env::set_var("OLLAMA_API_KEY", "test-ollama-key");
238
239        let config = Config::load().expect("Config should load successfully");
240
241        // Test that environment variables override defaults
242        assert_eq!(config.storage.qdrant.endpoint, "http://test-qdrant:6333");
243        assert_eq!(config.embedding.endpoint, "http://test-ollama:11434");
244        assert_eq!(
245            config.storage.qdrant.api_key,
246            Some("test-qdrant-key".to_string())
247        );
248        assert_eq!(
249            config.embedding.api_key,
250            Some("test-ollama-key".to_string())
251        );
252
253        // Test that other defaults are preserved
254        assert_eq!(config.embedding.provider, "ollama");
255        assert_eq!(config.embedding.model, "nomic-embed-text");
256        assert_eq!(config.indexing.chunk_size, 512);
257
258        // Restore original values
259        if let Some(val) = original_qdrant {
260            env::set_var("QDRANT_ENDPOINT", val);
261        } else {
262            env::remove_var("QDRANT_ENDPOINT");
263        }
264        if let Some(val) = original_ollama {
265            env::set_var("OLLAMA_ENDPOINT", val);
266        } else {
267            env::remove_var("OLLAMA_ENDPOINT");
268        }
269        if let Some(val) = original_qdrant_key {
270            env::set_var("QDRANT_API_KEY", val);
271        } else {
272            env::remove_var("QDRANT_API_KEY");
273        }
274        if let Some(val) = original_ollama_key {
275            env::set_var("OLLAMA_API_KEY", val);
276        } else {
277            env::remove_var("OLLAMA_API_KEY");
278        }
279    }
280
281    #[test]
282    fn test_test_collection_name_generation() {
283        // Test the unique collection name generation logic directly
284        let mut config = Config::default();
285
286        // Test case 1: "test" collection name should get unique suffix
287        config.storage.qdrant.collection = "test".to_string();
288        let original_collection = config.storage.qdrant.collection.clone();
289
290        // Simulate the uniquification logic from Config::load()
291        if config.storage.qdrant.collection == "test"
292            || config.storage.qdrant.collection == "directory-indexer-test"
293        {
294            config.storage.qdrant.collection = format!(
295                "directory-indexer-test-{}-{}",
296                std::process::id(),
297                std::time::SystemTime::now()
298                    .duration_since(std::time::UNIX_EPOCH)
299                    .unwrap()
300                    .as_nanos()
301            );
302        }
303
304        assert_ne!(config.storage.qdrant.collection, original_collection);
305        assert!(config
306            .storage
307            .qdrant
308            .collection
309            .starts_with("directory-indexer-test-"));
310        assert!(config
311            .storage
312            .qdrant
313            .collection
314            .contains(&std::process::id().to_string()));
315
316        // Test case 2: "directory-indexer-test" collection name should get unique suffix
317        let mut config2 = Config::default();
318        config2.storage.qdrant.collection = "directory-indexer-test".to_string();
319        let original_collection2 = config2.storage.qdrant.collection.clone();
320
321        if config2.storage.qdrant.collection == "test"
322            || config2.storage.qdrant.collection == "directory-indexer-test"
323        {
324            config2.storage.qdrant.collection = format!(
325                "directory-indexer-test-{}-{}",
326                std::process::id(),
327                std::time::SystemTime::now()
328                    .duration_since(std::time::UNIX_EPOCH)
329                    .unwrap()
330                    .as_nanos()
331            );
332        }
333
334        assert_ne!(config2.storage.qdrant.collection, original_collection2);
335        assert!(config2
336            .storage
337            .qdrant
338            .collection
339            .starts_with("directory-indexer-test-"));
340
341        // Test case 3: Other collection names should remain unchanged
342        let mut config3 = Config::default();
343        config3.storage.qdrant.collection = "my-custom-collection".to_string();
344        let original_collection3 = config3.storage.qdrant.collection.clone();
345
346        if config3.storage.qdrant.collection == "test"
347            || config3.storage.qdrant.collection == "directory-indexer-test"
348        {
349            config3.storage.qdrant.collection = format!(
350                "directory-indexer-test-{}-{}",
351                std::process::id(),
352                std::time::SystemTime::now()
353                    .duration_since(std::time::UNIX_EPOCH)
354                    .unwrap()
355                    .as_nanos()
356            );
357        }
358
359        assert_eq!(config3.storage.qdrant.collection, original_collection3);
360        assert_eq!(config3.storage.qdrant.collection, "my-custom-collection");
361    }
362
363    #[test]
364    fn test_config_save_to_specific_path() {
365        let temp_dir = TempDir::new().expect("Failed to create temp dir");
366        let config_path = temp_dir.path().join("test_config.json");
367
368        let config = Config::default();
369
370        // Create parent directories if needed
371        if let Some(parent) = config_path.parent() {
372            std::fs::create_dir_all(parent).expect("Failed to create parent directories");
373        }
374
375        // Write config directly to our test path
376        let json = serde_json::to_string_pretty(&config).expect("Failed to serialize config");
377        std::fs::write(&config_path, json).expect("Failed to write config file");
378
379        // Verify file was written
380        assert!(config_path.exists());
381
382        let content = fs::read_to_string(&config_path).expect("Failed to read config file");
383        assert!(content.contains("directory-indexer-test"));
384        assert!(content.contains("ollama"));
385        assert!(content.contains("nomic-embed-text"));
386
387        // Verify it can be deserialized back
388        let loaded_config: Config =
389            serde_json::from_str(&content).expect("Failed to deserialize config");
390        assert_eq!(loaded_config.embedding.provider, config.embedding.provider);
391        assert_eq!(
392            loaded_config.indexing.chunk_size,
393            config.indexing.chunk_size
394        );
395    }
396
397    #[test]
398    fn test_ensure_app_dir_exists() {
399        let temp_dir = TempDir::new().expect("Failed to create temp dir");
400        let test_path = temp_dir.path().join("nested").join("dir");
401
402        let mut config = Config::default();
403        config.storage.sqlite_path = test_path.join("data.db");
404
405        let result = config.ensure_app_dir_exists();
406        assert!(result.is_ok());
407        assert!(test_path.exists());
408    }
409
410    #[test]
411    fn test_default_app_dir_fallback() {
412        let original_data_dir = env::var("DIRECTORY_INDEXER_DATA_DIR").ok();
413
414        env::remove_var("DIRECTORY_INDEXER_DATA_DIR");
415
416        let app_dir = Config::default_app_dir();
417        // The app dir should be either the home directory + .directory-indexer
418        // or the current directory + .directory-indexer as fallback
419        assert!(app_dir.ends_with(".directory-indexer"));
420
421        if let Some(val) = original_data_dir {
422            env::set_var("DIRECTORY_INDEXER_DATA_DIR", val);
423        }
424    }
425
426    #[test]
427    fn test_config_serialization() {
428        let config = Config::default();
429
430        let json = serde_json::to_string(&config).expect("Failed to serialize config");
431        assert!(json.contains("directory-indexer"));
432
433        let deserialized: Config =
434            serde_json::from_str(&json).expect("Failed to deserialize config");
435        assert_eq!(
436            deserialized.storage.qdrant.collection,
437            config.storage.qdrant.collection
438        );
439        assert_eq!(deserialized.embedding.provider, config.embedding.provider);
440    }
441}