Skip to main content

research_master/config/
file_config.rs

1//! Configuration file support for research-master.
2//!
3//! This module provides TOML configuration file parsing with support
4//! for environment variable overrides.
5//!
6//! # Configuration File Format
7//!
8//! ```toml
9//! [api_keys]
10//! semantic_scholar = "your-api-key"
11//! core = "your-core-api-key"
12//!
13//! [downloads]
14//! default_path = "./downloads"
15//! organize_by_source = true
16//! max_file_size_mb = 100
17//!
18//! [rate_limits]
19//! default_requests_per_second = 5.0
20//! max_concurrent_requests = 10
21//!
22//! [sources]
23//! enabled_sources = "arxiv,semantic,openalex"
24//! disabled_sources = ""
25//!
26//! [[source_rates]]
27//! source = "semantic"
28//! requests_per_second = 0.5
29//!
30//! [[source_rates]]
31//! source = "arxiv"
32//! requests_per_second = 5.0
33//!
34//! [cache]
35//! enabled = true
36//! directory = "~/.cache/research-master"
37//! search_ttl_seconds = 1800
38//! citation_ttl_seconds = 900
39//! max_size_mb = 500
40//!
41//! [proxy]
42//! http = "http://proxy:8080"
43//! https = "https://proxy:8080"
44//! no_proxy = "localhost,127.0.0.1"
45//! ```
46
47use serde::{Deserialize, Serialize};
48use std::path::PathBuf;
49
50/// Configuration file structure
51#[derive(Debug, Default, Serialize, Deserialize)]
52pub struct ConfigFile {
53    /// API keys section
54    #[serde(default)]
55    pub api_keys: ApiKeysConfig,
56
57    /// Downloads section
58    #[serde(default)]
59    pub downloads: DownloadsConfig,
60
61    /// Rate limits section
62    #[serde(default)]
63    pub rate_limits: RateLimitsConfig,
64
65    /// Sources section
66    #[serde(default)]
67    pub sources: SourcesConfig,
68
69    /// Per-source rate limits
70    #[serde(default)]
71    pub source_rates: Vec<SourceRateConfig>,
72
73    /// Cache section
74    #[serde(default)]
75    pub cache: CacheConfig,
76
77    /// Proxy section
78    #[serde(default)]
79    pub proxy: ProxyConfig,
80
81    /// Logging section
82    #[serde(default)]
83    pub logging: LoggingConfig,
84}
85
86/// API keys configuration
87#[derive(Debug, Default, Serialize, Deserialize)]
88pub struct ApiKeysConfig {
89    #[serde(default)]
90    pub semantic_scholar: Option<String>,
91
92    #[serde(default)]
93    pub core: Option<String>,
94}
95
96/// Downloads configuration
97#[derive(Debug, Default, Serialize, Deserialize)]
98pub struct DownloadsConfig {
99    #[serde(default = "default_download_path")]
100    pub default_path: PathBuf,
101
102    #[serde(default = "default_true")]
103    pub organize_by_source: bool,
104
105    #[serde(default = "default_max_file_size")]
106    pub max_file_size_mb: usize,
107}
108
109fn default_download_path() -> PathBuf {
110    PathBuf::from("./downloads")
111}
112
113fn default_true() -> bool {
114    true
115}
116
117fn default_max_file_size() -> usize {
118    100
119}
120
121/// Rate limits configuration
122#[derive(Debug, Default, Serialize, Deserialize)]
123pub struct RateLimitsConfig {
124    #[serde(default = "default_rps")]
125    pub default_requests_per_second: f32,
126
127    #[serde(default = "default_max_concurrent")]
128    pub max_concurrent_requests: usize,
129}
130
131fn default_rps() -> f32 {
132    5.0
133}
134
135fn default_max_concurrent() -> usize {
136    10
137}
138
139/// Sources configuration
140#[derive(Debug, Default, Serialize, Deserialize)]
141pub struct SourcesConfig {
142    #[serde(default)]
143    pub enabled_sources: Option<String>,
144
145    #[serde(default)]
146    pub disabled_sources: Option<String>,
147}
148
149/// Per-source rate limit configuration
150#[derive(Debug, Clone, Serialize, Deserialize)]
151pub struct SourceRateConfig {
152    pub source: String,
153    pub requests_per_second: f32,
154}
155
156/// Cache configuration
157#[derive(Debug, Default, Serialize, Deserialize)]
158pub struct CacheConfig {
159    #[serde(default)]
160    pub enabled: bool,
161
162    #[serde(default)]
163    pub directory: Option<PathBuf>,
164
165    #[serde(default = "default_search_ttl")]
166    pub search_ttl_seconds: u64,
167
168    #[serde(default = "default_citation_ttl")]
169    pub citation_ttl_seconds: u64,
170
171    #[serde(default = "default_max_cache_size")]
172    pub max_size_mb: usize,
173}
174
175fn default_search_ttl() -> u64 {
176    1800 // 30 minutes
177}
178
179fn default_citation_ttl() -> u64 {
180    900 // 15 minutes
181}
182
183fn default_max_cache_size() -> usize {
184    500
185}
186
187/// Proxy configuration
188#[derive(Debug, Default, Serialize, Deserialize)]
189pub struct ProxyConfig {
190    #[serde(default)]
191    pub http: Option<String>,
192
193    #[serde(default)]
194    pub https: Option<String>,
195
196    #[serde(default)]
197    pub no_proxy: Option<String>,
198}
199
200/// Logging configuration
201#[derive(Debug, Default, Serialize, Deserialize)]
202pub struct LoggingConfig {
203    #[serde(default = "default_log_level")]
204    pub level: String,
205
206    #[serde(default)]
207    pub format: Option<String>,
208}
209
210fn default_log_level() -> String {
211    "info".to_string()
212}
213
214impl ConfigFile {
215    /// Load configuration from a TOML file
216    pub fn load(path: &PathBuf) -> Result<Self, ConfigFileError> {
217        let content =
218            std::fs::read_to_string(path).map_err(|e| ConfigFileError::Io(e.to_string()))?;
219
220        toml::from_str(&content).map_err(|e| ConfigFileError::Parse(e.to_string()))
221    }
222
223    /// Save configuration to a TOML file
224    pub fn save(&self, path: &PathBuf) -> Result<(), ConfigFileError> {
225        let content =
226            toml::to_string_pretty(self).map_err(|e| ConfigFileError::Serialize(e.to_string()))?;
227
228        std::fs::write(path, content).map_err(|e| ConfigFileError::Io(e.to_string()))
229    }
230
231    /// Create default configuration
232    #[allow(clippy::should_implement_trait)]
233    pub fn create_default() -> Self {
234        Self {
235            api_keys: ApiKeysConfig::default(),
236            downloads: DownloadsConfig::default(),
237            rate_limits: RateLimitsConfig::default(),
238            sources: SourcesConfig::default(),
239            source_rates: Vec::new(),
240            cache: CacheConfig::default(),
241            proxy: ProxyConfig::default(),
242            logging: LoggingConfig::default(),
243        }
244    }
245}
246
247/// Configuration file errors
248#[derive(Debug, thiserror::Error)]
249pub enum ConfigFileError {
250    #[error("IO error: {0}")]
251    Io(String),
252
253    #[error("Parse error: {0}")]
254    Parse(String),
255
256    #[error("Serialize error: {0}")]
257    Serialize(String),
258}
259
260#[cfg(test)]
261mod tests {
262    use super::*;
263    use std::fs::File;
264    use std::io::Write;
265    use tempfile::tempdir;
266
267    #[test]
268    fn test_config_file_load() {
269        let dir = tempdir().unwrap();
270        let path = dir.path().join("config.toml");
271
272        let toml_content = r#"
273[api_keys]
274semantic_scholar = "test-key"
275core = "core-key"
276
277[downloads]
278default_path = "/tmp/downloads"
279organize_by_source = true
280max_file_size_mb = 200
281
282[rate_limits]
283default_requests_per_second = 3.0
284max_concurrent_requests = 5
285
286[sources]
287enabled_sources = "arxiv,semantic"
288
289[[source_rates]]
290source = "semantic"
291requests_per_second = 0.5
292
293[cache]
294enabled = true
295max_size_mb = 1000
296
297[logging]
298level = "debug"
299"#;
300
301        let mut file = File::create(&path).unwrap();
302        file.write_all(toml_content.as_bytes()).unwrap();
303
304        let config = ConfigFile::load(&path).unwrap();
305
306        assert_eq!(
307            config.api_keys.semantic_scholar,
308            Some("test-key".to_string())
309        );
310        assert_eq!(config.api_keys.core, Some("core-key".to_string()));
311        assert_eq!(config.downloads.max_file_size_mb, 200);
312        assert_eq!(config.rate_limits.default_requests_per_second, 3.0);
313        assert_eq!(
314            config.sources.enabled_sources,
315            Some("arxiv,semantic".to_string())
316        );
317        assert_eq!(config.source_rates.len(), 1);
318        assert_eq!(config.source_rates[0].source, "semantic");
319        assert_eq!(config.source_rates[0].requests_per_second, 0.5);
320        assert!(config.cache.enabled);
321        assert_eq!(config.cache.max_size_mb, 1000);
322    }
323
324    #[test]
325    fn test_config_file_save_load() {
326        let dir = tempdir().unwrap();
327        let path = dir.path().join("config.toml");
328
329        let mut config = ConfigFile::default();
330        config.api_keys.semantic_scholar = Some("saved-key".to_string());
331        config.rate_limits.default_requests_per_second = 2.0;
332
333        config.save(&path).unwrap();
334
335        let loaded = ConfigFile::load(&path).unwrap();
336        assert_eq!(
337            loaded.api_keys.semantic_scholar,
338            Some("saved-key".to_string())
339        );
340        assert_eq!(loaded.rate_limits.default_requests_per_second, 2.0);
341    }
342
343    #[test]
344    fn test_config_file_nonexistent() {
345        let path = PathBuf::from("/nonexistent/config.toml");
346        let result = ConfigFile::load(&path);
347        assert!(result.is_err());
348    }
349
350    #[test]
351    fn test_config_file_invalid_toml() {
352        let dir = tempdir().unwrap();
353        let path = dir.path().join("invalid.toml");
354
355        std::fs::write(&path, "invalid = toml = content").unwrap();
356
357        let result = ConfigFile::load(&path);
358        assert!(result.is_err());
359    }
360}