Skip to main content

ceres_core/
config.rs

1//! Configuration types for Ceres components.
2//!
3//! # Configuration Improvements
4//!
5//! TODO(config): Make all configuration values environment-configurable
6//! Currently all defaults are hardcoded. Should support:
7//! - `DB_MAX_CONNECTIONS` for database pool size
8//! - `SYNC_CONCURRENCY` for parallel dataset processing
9//! - `HTTP_TIMEOUT` for API request timeout
10//! - `HTTP_MAX_RETRIES` for retry attempts
11//!
12//! Consider using the `config` crate for layered configuration:
13//! defaults -> config file -> environment variables -> CLI args
14
15use serde::{Deserialize, Serialize};
16use std::path::{Path, PathBuf};
17use std::time::Duration;
18
19use crate::circuit_breaker::CircuitBreakerConfig;
20use crate::error::AppError;
21
22/// Database connection pool configuration.
23///
24/// TODO(config): Support environment variable `DB_MAX_CONNECTIONS`
25/// Default of 5 may be insufficient for high-concurrency scenarios.
26pub struct DbConfig {
27    pub max_connections: u32,
28}
29
30impl Default for DbConfig {
31    fn default() -> Self {
32        // TODO(config): Read from DB_MAX_CONNECTIONS env var
33        Self { max_connections: 5 }
34    }
35}
36
37/// HTTP client configuration for external API calls.
38pub struct HttpConfig {
39    pub timeout: Duration,
40    pub max_retries: u32,
41    pub retry_base_delay: Duration,
42}
43
44impl Default for HttpConfig {
45    fn default() -> Self {
46        Self {
47            timeout: Duration::from_secs(30),
48            max_retries: 3,
49            retry_base_delay: Duration::from_millis(500),
50        }
51    }
52}
53
54/// Portal synchronization configuration.
55///
56/// TODO(config): Support CLI arg `--concurrency` and env var `SYNC_CONCURRENCY`
57/// Optimal value depends on portal rate limits and system resources.
58/// Consider auto-tuning based on API response times.
59#[derive(Clone)]
60pub struct SyncConfig {
61    /// Number of concurrent dataset processing tasks.
62    pub concurrency: usize,
63    /// Force full sync even if incremental sync is available.
64    pub force_full_sync: bool,
65    /// Circuit breaker configuration for API resilience.
66    pub circuit_breaker: CircuitBreakerConfig,
67}
68
69impl Default for SyncConfig {
70    fn default() -> Self {
71        // TODO(config): Read from SYNC_CONCURRENCY env var
72        Self {
73            concurrency: 10,
74            force_full_sync: false,
75            circuit_breaker: CircuitBreakerConfig::default(),
76        }
77    }
78}
79
80impl SyncConfig {
81    /// Creates a new SyncConfig with force_full_sync enabled.
82    pub fn with_full_sync(mut self) -> Self {
83        self.force_full_sync = true;
84        self
85    }
86
87    /// Creates a new SyncConfig with custom circuit breaker configuration.
88    pub fn with_circuit_breaker(mut self, config: CircuitBreakerConfig) -> Self {
89        self.circuit_breaker = config;
90        self
91    }
92}
93
94// =============================================================================
95// Portal Configuration (portals.toml)
96// =============================================================================
97
98/// Default portal type when not specified in configuration.
99fn default_portal_type() -> String {
100    "ckan".to_string()
101}
102
103/// Default enabled status when not specified in configuration.
104fn default_enabled() -> bool {
105    true
106}
107
108/// Root configuration structure for portals.toml.
109///
110/// This structure represents the entire configuration file containing
111/// an array of portal definitions.
112///
113/// # Example
114///
115/// ```toml
116/// [[portals]]
117/// name = "dati-gov-it"
118/// url = "https://dati.gov.it"
119/// type = "ckan"
120/// description = "Italian national open data portal"
121///
122/// [[portals]]
123/// name = "milano"
124/// url = "https://dati.comune.milano.it"
125/// enabled = true
126/// ```
127#[derive(Debug, Clone, Serialize, Deserialize)]
128pub struct PortalsConfig {
129    /// Array of portal configurations.
130    pub portals: Vec<PortalEntry>,
131}
132
133impl PortalsConfig {
134    /// Returns only enabled portals.
135    ///
136    /// Portals with `enabled = false` are excluded from batch harvesting.
137    pub fn enabled_portals(&self) -> Vec<&PortalEntry> {
138        self.portals.iter().filter(|p| p.enabled).collect()
139    }
140
141    /// Find a portal by name (case-insensitive).
142    ///
143    /// # Arguments
144    /// * `name` - The portal name to search for.
145    ///
146    /// # Returns
147    /// The matching portal entry, or None if not found.
148    pub fn find_by_name(&self, name: &str) -> Option<&PortalEntry> {
149        self.portals
150            .iter()
151            .find(|p| p.name.eq_ignore_ascii_case(name))
152    }
153}
154
155/// A single portal entry in the configuration file.
156///
157/// Each portal entry defines a CKAN portal to harvest, including
158/// its URL, type, and whether it's enabled for batch harvesting.
159#[derive(Debug, Clone, Serialize, Deserialize)]
160pub struct PortalEntry {
161    /// Human-readable portal name.
162    ///
163    /// Used for `--portal <name>` lookup and logging.
164    pub name: String,
165
166    /// Base URL of the CKAN portal.
167    ///
168    /// Example: "<https://dati.comune.milano.it>"
169    pub url: String,
170
171    /// Portal type: "ckan", "socrata", or "dcat".
172    ///
173    /// Defaults to "ckan" if not specified.
174    #[serde(rename = "type", default = "default_portal_type")]
175    pub portal_type: String,
176
177    /// Whether this portal is enabled for batch harvesting.
178    ///
179    /// Defaults to `true` if not specified.
180    #[serde(default = "default_enabled")]
181    pub enabled: bool,
182
183    /// Optional description of the portal.
184    pub description: Option<String>,
185}
186
187/// Default configuration file name.
188pub const CONFIG_FILE_NAME: &str = "portals.toml";
189
190/// Returns the default configuration directory path.
191///
192/// Uses XDG Base Directory specification: `~/.config/ceres/`
193pub fn default_config_dir() -> Option<PathBuf> {
194    dirs::config_dir().map(|p| p.join("ceres"))
195}
196
197/// Returns the default configuration file path.
198///
199/// Path: `~/.config/ceres/portals.toml`
200pub fn default_config_path() -> Option<PathBuf> {
201    default_config_dir().map(|p| p.join(CONFIG_FILE_NAME))
202}
203
204/// Default template content for a new portals.toml file.
205///
206/// Includes pre-configured Italian open data portals so users can
207/// immediately run `ceres harvest` without manual configuration.
208const DEFAULT_CONFIG_TEMPLATE: &str = r#"# Ceres Portal Configuration
209#
210# Usage:
211#   ceres harvest                 # Harvest all enabled portals
212#   ceres harvest --portal milano # Harvest specific portal by name
213#   ceres harvest https://...     # Harvest single URL (ignores this file)
214#
215# Set enabled = false to skip a portal during batch harvest.
216
217# City of Milan open data
218[[portals]]
219name = "milano"
220url = "https://dati.comune.milano.it"
221type = "ckan"
222description = "Open data del Comune di Milano"
223
224# Sicily Region open data
225[[portals]]
226name = "sicilia"
227url = "https://dati.regione.sicilia.it"
228type = "ckan"
229description = "Open data della Regione Siciliana"
230"#;
231
232/// Load portal configuration from a TOML file.
233///
234/// # Arguments
235/// * `path` - Optional custom path. If `None`, uses default XDG path.
236///
237/// # Returns
238/// * `Ok(Some(config))` - Configuration loaded successfully
239/// * `Ok(None)` - No configuration file found (not an error for backward compatibility)
240/// * `Err(e)` - Configuration file exists but is invalid
241///
242/// # Behavior
243/// If no configuration file exists at the default path, a template file
244/// is automatically created to help users get started.
245pub fn load_portals_config(path: Option<PathBuf>) -> Result<Option<PortalsConfig>, AppError> {
246    let using_default_path = path.is_none();
247    let config_path = match path {
248        Some(p) => p,
249        None => match default_config_path() {
250            Some(p) => p,
251            None => return Ok(None),
252        },
253    };
254
255    if !config_path.exists() {
256        // Auto-create template if using default path
257        if using_default_path {
258            match create_default_config(&config_path) {
259                Ok(()) => {
260                    // Template created successfully - read it and return the config
261                    // This allows the user to immediately harvest without re-running
262                    tracing::info!(
263                        "Config file created at {}. Starting harvest with default portals...",
264                        config_path.display()
265                    );
266                    // Continue to read the newly created file below
267                }
268                Err(e) => {
269                    // Log warning but don't fail - user might not have write permissions
270                    tracing::warn!("Could not create default config template: {}", e);
271                    return Ok(None);
272                }
273            }
274        } else {
275            // Custom path specified but doesn't exist - that's an error
276            return Err(AppError::ConfigError(format!(
277                "Config file not found: {}",
278                config_path.display()
279            )));
280        }
281    }
282
283    let content = std::fs::read_to_string(&config_path).map_err(|e| {
284        AppError::ConfigError(format!(
285            "Failed to read config file '{}': {}",
286            config_path.display(),
287            e
288        ))
289    })?;
290
291    let config: PortalsConfig = toml::from_str(&content).map_err(|e| {
292        AppError::ConfigError(format!(
293            "Invalid TOML in '{}': {}",
294            config_path.display(),
295            e
296        ))
297    })?;
298
299    Ok(Some(config))
300}
301
302/// Create a default configuration file with a template.
303///
304/// Creates the parent directory if it doesn't exist.
305///
306/// # Arguments
307/// * `path` - The path where the config file should be created.
308fn create_default_config(path: &Path) -> std::io::Result<()> {
309    // Create parent directory if needed
310    if let Some(parent) = path.parent() {
311        std::fs::create_dir_all(parent)?;
312    }
313
314    std::fs::write(path, DEFAULT_CONFIG_TEMPLATE)?;
315    tracing::info!("Created default config template at: {}", path.display());
316
317    Ok(())
318}
319
320#[cfg(test)]
321mod tests {
322    use super::*;
323
324    #[test]
325    fn test_db_config_defaults() {
326        let config = DbConfig::default();
327        assert_eq!(config.max_connections, 5);
328    }
329
330    #[test]
331    fn test_http_config_defaults() {
332        let config = HttpConfig::default();
333        assert_eq!(config.timeout, Duration::from_secs(30));
334        assert_eq!(config.max_retries, 3);
335        assert_eq!(config.retry_base_delay, Duration::from_millis(500));
336    }
337
338    #[test]
339    fn test_sync_config_defaults() {
340        let config = SyncConfig::default();
341        assert_eq!(config.concurrency, 10);
342    }
343
344    // =========================================================================
345    // Portal Configuration Tests
346    // =========================================================================
347
348    #[test]
349    fn test_portals_config_deserialize() {
350        let toml = r#"
351[[portals]]
352name = "test-portal"
353url = "https://example.com"
354type = "ckan"
355"#;
356        let config: PortalsConfig = toml::from_str(toml).unwrap();
357        assert_eq!(config.portals.len(), 1);
358        assert_eq!(config.portals[0].name, "test-portal");
359        assert_eq!(config.portals[0].url, "https://example.com");
360        assert_eq!(config.portals[0].portal_type, "ckan");
361        assert!(config.portals[0].enabled); // default
362        assert!(config.portals[0].description.is_none());
363    }
364
365    #[test]
366    fn test_portals_config_defaults() {
367        let toml = r#"
368[[portals]]
369name = "minimal"
370url = "https://example.com"
371"#;
372        let config: PortalsConfig = toml::from_str(toml).unwrap();
373        assert_eq!(config.portals[0].portal_type, "ckan"); // default type
374        assert!(config.portals[0].enabled); // default enabled
375    }
376
377    #[test]
378    fn test_portals_config_enabled_filter() {
379        let toml = r#"
380[[portals]]
381name = "enabled-portal"
382url = "https://a.com"
383
384[[portals]]
385name = "disabled-portal"
386url = "https://b.com"
387enabled = false
388"#;
389        let config: PortalsConfig = toml::from_str(toml).unwrap();
390        let enabled = config.enabled_portals();
391        assert_eq!(enabled.len(), 1);
392        assert_eq!(enabled[0].name, "enabled-portal");
393    }
394
395    #[test]
396    fn test_portals_config_find_by_name() {
397        let toml = r#"
398[[portals]]
399name = "Milano"
400url = "https://dati.comune.milano.it"
401"#;
402        let config: PortalsConfig = toml::from_str(toml).unwrap();
403
404        // Case-insensitive search
405        assert!(config.find_by_name("milano").is_some());
406        assert!(config.find_by_name("MILANO").is_some());
407        assert!(config.find_by_name("Milano").is_some());
408
409        // Not found
410        assert!(config.find_by_name("roma").is_none());
411    }
412
413    #[test]
414    fn test_portals_config_with_description() {
415        let toml = r#"
416[[portals]]
417name = "test"
418url = "https://example.com"
419description = "A test portal"
420"#;
421        let config: PortalsConfig = toml::from_str(toml).unwrap();
422        assert_eq!(
423            config.portals[0].description,
424            Some("A test portal".to_string())
425        );
426    }
427
428    #[test]
429    fn test_portals_config_multiple_portals() {
430        let toml = r#"
431[[portals]]
432name = "portal-1"
433url = "https://a.com"
434
435[[portals]]
436name = "portal-2"
437url = "https://b.com"
438
439[[portals]]
440name = "portal-3"
441url = "https://c.com"
442enabled = false
443"#;
444        let config: PortalsConfig = toml::from_str(toml).unwrap();
445        assert_eq!(config.portals.len(), 3);
446        assert_eq!(config.enabled_portals().len(), 2);
447    }
448
449    #[test]
450    fn test_default_config_path() {
451        // This test just verifies the function doesn't panic
452        // Actual path depends on the platform
453        let path = default_config_path();
454        if let Some(p) = path {
455            assert!(p.ends_with("portals.toml"));
456        }
457    }
458
459    // =========================================================================
460    // load_portals_config() tests with real files
461    // =========================================================================
462
463    use std::io::Write;
464    use tempfile::NamedTempFile;
465
466    #[test]
467    fn test_load_portals_config_valid_file() {
468        let mut file = NamedTempFile::new().unwrap();
469        writeln!(
470            file,
471            r#"
472[[portals]]
473name = "test"
474url = "https://test.com"
475"#
476        )
477        .unwrap();
478
479        let config = load_portals_config(Some(file.path().to_path_buf()))
480            .unwrap()
481            .unwrap();
482
483        assert_eq!(config.portals.len(), 1);
484        assert_eq!(config.portals[0].name, "test");
485        assert_eq!(config.portals[0].url, "https://test.com");
486    }
487
488    #[test]
489    fn test_load_portals_config_custom_path_not_found() {
490        let result = load_portals_config(Some("/nonexistent/path/to/config.toml".into()));
491        assert!(result.is_err());
492        let err = result.unwrap_err();
493        assert!(matches!(err, AppError::ConfigError(_)));
494    }
495
496    #[test]
497    fn test_load_portals_config_invalid_toml() {
498        let mut file = NamedTempFile::new().unwrap();
499        writeln!(file, "this is not valid toml {{{{").unwrap();
500
501        let result = load_portals_config(Some(file.path().to_path_buf()));
502        assert!(result.is_err());
503        let err = result.unwrap_err();
504        assert!(matches!(err, AppError::ConfigError(_)));
505    }
506
507    #[test]
508    fn test_load_portals_config_multiple_portals_with_enabled_filter() {
509        let mut file = NamedTempFile::new().unwrap();
510        writeln!(
511            file,
512            r#"
513[[portals]]
514name = "enabled-portal"
515url = "https://a.com"
516
517[[portals]]
518name = "disabled-portal"
519url = "https://b.com"
520enabled = false
521
522[[portals]]
523name = "another-enabled"
524url = "https://c.com"
525enabled = true
526"#
527        )
528        .unwrap();
529
530        let config = load_portals_config(Some(file.path().to_path_buf()))
531            .unwrap()
532            .unwrap();
533
534        assert_eq!(config.portals.len(), 3);
535        assert_eq!(config.enabled_portals().len(), 2);
536    }
537
538    #[test]
539    fn test_load_portals_config_with_all_fields() {
540        let mut file = NamedTempFile::new().unwrap();
541        writeln!(
542            file,
543            r#"
544[[portals]]
545name = "full-config"
546url = "https://example.com"
547type = "ckan"
548enabled = true
549description = "A fully configured portal"
550"#
551        )
552        .unwrap();
553
554        let config = load_portals_config(Some(file.path().to_path_buf()))
555            .unwrap()
556            .unwrap();
557
558        let portal = &config.portals[0];
559        assert_eq!(portal.name, "full-config");
560        assert_eq!(portal.url, "https://example.com");
561        assert_eq!(portal.portal_type, "ckan");
562        assert!(portal.enabled);
563        assert_eq!(
564            portal.description,
565            Some("A fully configured portal".to_string())
566        );
567    }
568
569    #[test]
570    fn test_load_portals_config_empty_portals_array() {
571        let mut file = NamedTempFile::new().unwrap();
572        writeln!(file, "portals = []").unwrap();
573
574        let config = load_portals_config(Some(file.path().to_path_buf()))
575            .unwrap()
576            .unwrap();
577
578        assert!(config.portals.is_empty());
579        assert!(config.enabled_portals().is_empty());
580    }
581}