Skip to main content

tuitbot_core/config/
validation.rs

1//! Configuration validation logic.
2
3use super::Config;
4use crate::error::ConfigError;
5
6impl Config {
7    /// Validate the minimum configuration required for progressive activation.
8    ///
9    /// Only checks business profile fields and structural requirements.
10    /// Skips LLM API key, X API client_id, and other advanced fields that
11    /// can be configured later via Settings.
12    pub fn validate_minimum(&self) -> Result<(), Vec<ConfigError>> {
13        let mut errors = Vec::new();
14
15        // Business profile — required for tier 1
16        if self.business.product_name.is_empty() {
17            errors.push(ConfigError::MissingField {
18                field: "business.product_name".to_string(),
19            });
20        }
21
22        if self.business.product_description.trim().is_empty() {
23            errors.push(ConfigError::MissingField {
24                field: "business.product_description".to_string(),
25            });
26        }
27
28        if self.business.product_keywords.is_empty() && self.business.competitor_keywords.is_empty()
29        {
30            errors.push(ConfigError::MissingField {
31                field: "business.product_keywords or business.competitor_keywords".to_string(),
32            });
33        }
34
35        // Validate LLM provider value if present (but don't require it)
36        if !self.llm.provider.is_empty() {
37            match self.llm.provider.as_str() {
38                "openai" | "anthropic" | "ollama" | "groq" => {}
39                _ => {
40                    errors.push(ConfigError::InvalidValue {
41                        field: "llm.provider".to_string(),
42                        message: "must be openai, anthropic, ollama, or groq".to_string(),
43                    });
44                }
45            }
46        }
47
48        // Validate provider_backend value if present
49        let backend = self.x_api.provider_backend.as_str();
50        if !backend.is_empty() && backend != "x_api" && backend != "scraper" {
51            errors.push(ConfigError::InvalidValue {
52                field: "x_api.provider_backend".to_string(),
53                message: format!(
54                    "must be 'x_api' or 'scraper', got '{}'",
55                    self.x_api.provider_backend
56                ),
57            });
58        }
59
60        // Structural: db_path
61        let db_path_trimmed = self.storage.db_path.trim();
62        if db_path_trimmed.is_empty() {
63            errors.push(ConfigError::InvalidValue {
64                field: "storage.db_path".to_string(),
65                message: "must not be empty or whitespace-only".to_string(),
66            });
67        } else {
68            let expanded = crate::startup::expand_tilde(db_path_trimmed);
69            if expanded.is_dir() {
70                errors.push(ConfigError::InvalidValue {
71                    field: "storage.db_path".to_string(),
72                    message: format!("'{}' is a directory, must point to a file", db_path_trimmed),
73                });
74            }
75        }
76
77        // Validate content sources against deployment capabilities (if any)
78        for (i, source) in self.content_sources.sources.iter().enumerate() {
79            if !self.deployment_mode.allows_source_type(&source.source_type) {
80                errors.push(ConfigError::InvalidValue {
81                    field: format!("content_sources.sources[{}].source_type", i),
82                    message: format!(
83                        "source type '{}' is not available in {} deployment mode",
84                        source.source_type, self.deployment_mode
85                    ),
86                });
87            }
88        }
89
90        if errors.is_empty() {
91            Ok(())
92        } else {
93            Err(errors)
94        }
95    }
96
97    /// Validate the configuration, returning all errors found (not just the first).
98    pub fn validate(&self) -> Result<(), Vec<ConfigError>> {
99        let mut errors = Vec::new();
100
101        // Validate business profile
102        if self.business.product_name.is_empty() {
103            errors.push(ConfigError::MissingField {
104                field: "business.product_name".to_string(),
105            });
106        }
107
108        if self.business.product_keywords.is_empty() && self.business.competitor_keywords.is_empty()
109        {
110            errors.push(ConfigError::MissingField {
111                field: "business.product_keywords or business.competitor_keywords".to_string(),
112            });
113        }
114
115        if self.business.product_description.trim().is_empty() {
116            errors.push(ConfigError::MissingField {
117                field: "business.product_description".to_string(),
118            });
119        }
120
121        if self.business.industry_topics.is_empty() {
122            errors.push(ConfigError::MissingField {
123                field: "business.industry_topics".to_string(),
124            });
125        }
126
127        // Validate LLM provider
128        if !self.llm.provider.is_empty() {
129            match self.llm.provider.as_str() {
130                "openai" | "anthropic" | "ollama" | "groq" => {}
131                _ => {
132                    errors.push(ConfigError::InvalidValue {
133                        field: "llm.provider".to_string(),
134                        message: "must be openai, anthropic, ollama, or groq".to_string(),
135                    });
136                }
137            }
138
139            if matches!(self.llm.provider.as_str(), "openai" | "anthropic" | "groq") {
140                match &self.llm.api_key {
141                    Some(key) if !key.is_empty() => {}
142                    _ => {
143                        errors.push(ConfigError::MissingField {
144                            field: format!(
145                                "llm.api_key (required for {} provider)",
146                                self.llm.provider
147                            ),
148                        });
149                    }
150                }
151            }
152        }
153
154        // Validate auth mode
155        if !self.auth.mode.is_empty() {
156            match self.auth.mode.as_str() {
157                "manual" | "local_callback" => {}
158                _ => {
159                    errors.push(ConfigError::InvalidValue {
160                        field: "auth.mode".to_string(),
161                        message: "must be manual or local_callback".to_string(),
162                    });
163                }
164            }
165        }
166
167        // Validate scoring threshold
168        if self.scoring.threshold > 100 {
169            errors.push(ConfigError::InvalidValue {
170                field: "scoring.threshold".to_string(),
171                message: "must be between 0 and 100".to_string(),
172            });
173        }
174
175        // Validate limits
176        if self.limits.max_replies_per_day == 0 {
177            errors.push(ConfigError::InvalidValue {
178                field: "limits.max_replies_per_day".to_string(),
179                message: "must be greater than 0".to_string(),
180            });
181        }
182
183        if self.limits.max_tweets_per_day == 0 {
184            errors.push(ConfigError::InvalidValue {
185                field: "limits.max_tweets_per_day".to_string(),
186                message: "must be greater than 0".to_string(),
187            });
188        }
189
190        if self.limits.max_threads_per_week == 0 {
191            errors.push(ConfigError::InvalidValue {
192                field: "limits.max_threads_per_week".to_string(),
193                message: "must be greater than 0".to_string(),
194            });
195        }
196
197        if self.limits.min_action_delay_seconds > self.limits.max_action_delay_seconds {
198            errors.push(ConfigError::InvalidValue {
199                field: "limits.min_action_delay_seconds".to_string(),
200                message: "must be less than or equal to max_action_delay_seconds".to_string(),
201            });
202        }
203
204        // Validate schedule
205        if self.schedule.active_hours_start > 23 {
206            errors.push(ConfigError::InvalidValue {
207                field: "schedule.active_hours_start".to_string(),
208                message: "must be between 0 and 23".to_string(),
209            });
210        }
211        if self.schedule.active_hours_end > 23 {
212            errors.push(ConfigError::InvalidValue {
213                field: "schedule.active_hours_end".to_string(),
214                message: "must be between 0 and 23".to_string(),
215            });
216        }
217        if !self.schedule.timezone.is_empty()
218            && self.schedule.timezone.parse::<chrono_tz::Tz>().is_err()
219        {
220            errors.push(ConfigError::InvalidValue {
221                field: "schedule.timezone".to_string(),
222                message: format!(
223                    "'{}' is not a valid IANA timezone name",
224                    self.schedule.timezone
225                ),
226            });
227        }
228        let valid_days = ["Mon", "Tue", "Wed", "Thu", "Fri", "Sat", "Sun"];
229        for day in &self.schedule.active_days {
230            if !valid_days.contains(&day.as_str()) {
231                errors.push(ConfigError::InvalidValue {
232                    field: "schedule.active_days".to_string(),
233                    message: format!(
234                        "'{}' is not a valid day abbreviation (use Mon, Tue, Wed, Thu, Fri, Sat, Sun)",
235                        day
236                    ),
237                });
238                break;
239            }
240        }
241
242        // Validate preferred_times
243        for time_str in &self.schedule.preferred_times {
244            if time_str != "auto" && !is_valid_hhmm(time_str) {
245                errors.push(ConfigError::InvalidValue {
246                    field: "schedule.preferred_times".to_string(),
247                    message: format!(
248                        "'{}' is not a valid time (use HH:MM 24h format or \"auto\")",
249                        time_str
250                    ),
251                });
252                break;
253            }
254        }
255
256        // Validate preferred_times_override keys and values
257        for (day, times) in &self.schedule.preferred_times_override {
258            if !valid_days.contains(&day.as_str()) {
259                errors.push(ConfigError::InvalidValue {
260                    field: "schedule.preferred_times_override".to_string(),
261                    message: format!(
262                        "'{}' is not a valid day abbreviation (use Mon, Tue, Wed, Thu, Fri, Sat, Sun)",
263                        day
264                    ),
265                });
266                break;
267            }
268            for time_str in times {
269                if !is_valid_hhmm(time_str) {
270                    errors.push(ConfigError::InvalidValue {
271                        field: "schedule.preferred_times_override".to_string(),
272                        message: format!(
273                            "'{}' is not a valid time for {} (use HH:MM 24h format)",
274                            time_str, day
275                        ),
276                    });
277                    break;
278                }
279            }
280        }
281
282        // Validate MCP policy: tools can't be in both blocked_tools and require_approval_for
283        for tool in &self.mcp_policy.blocked_tools {
284            if self.mcp_policy.require_approval_for.contains(tool) {
285                errors.push(ConfigError::InvalidValue {
286                    field: "mcp_policy.blocked_tools".to_string(),
287                    message: format!(
288                        "tool '{tool}' cannot be in both blocked_tools and require_approval_for"
289                    ),
290                });
291                break;
292            }
293        }
294
295        // Count effective slots per day vs max_tweets_per_day
296        let effective_slots = if self.schedule.preferred_times.is_empty() {
297            0
298        } else {
299            // "auto" expands to 3 slots
300            let base_count: usize = self
301                .schedule
302                .preferred_times
303                .iter()
304                .map(|t| if t == "auto" { 3 } else { 1 })
305                .sum();
306            // Check max across all override days too
307            let max_override = self
308                .schedule
309                .preferred_times_override
310                .values()
311                .map(|v| v.len())
312                .max()
313                .unwrap_or(0);
314            base_count.max(max_override)
315        };
316        if effective_slots > self.limits.max_tweets_per_day as usize {
317            errors.push(ConfigError::InvalidValue {
318                field: "schedule.preferred_times".to_string(),
319                message: format!(
320                    "preferred_times has {} slots but limits.max_tweets_per_day is {} — \
321                     increase the limit or reduce the number of time slots",
322                    effective_slots, self.limits.max_tweets_per_day
323                ),
324            });
325        }
326
327        // Validate thread_preferred_day
328        if let Some(day) = &self.schedule.thread_preferred_day {
329            if !valid_days.contains(&day.as_str()) {
330                errors.push(ConfigError::InvalidValue {
331                    field: "schedule.thread_preferred_day".to_string(),
332                    message: format!(
333                        "'{}' is not a valid day abbreviation (use Mon, Tue, Wed, Thu, Fri, Sat, Sun)",
334                        day
335                    ),
336                });
337            }
338        }
339
340        // Validate thread_preferred_time
341        if !is_valid_hhmm(&self.schedule.thread_preferred_time) {
342            errors.push(ConfigError::InvalidValue {
343                field: "schedule.thread_preferred_time".to_string(),
344                message: format!(
345                    "'{}' is not a valid time (use HH:MM 24h format)",
346                    self.schedule.thread_preferred_time
347                ),
348            });
349        }
350
351        // Validate provider_backend value
352        let backend = self.x_api.provider_backend.as_str();
353        if !backend.is_empty() && backend != "x_api" && backend != "scraper" {
354            errors.push(ConfigError::InvalidValue {
355                field: "x_api.provider_backend".to_string(),
356                message: format!(
357                    "must be 'x_api' or 'scraper', got '{}'",
358                    self.x_api.provider_backend
359                ),
360            });
361        }
362
363        // Reject scraper mode in cloud deployment
364        if self.deployment_mode == super::DeploymentMode::Cloud
365            && self.x_api.provider_backend == "scraper"
366        {
367            errors.push(ConfigError::InvalidValue {
368                field: "x_api.provider_backend".to_string(),
369                message: "Local No-Key Mode is not available in cloud deployment. \
370                          Use the Official X API (provider_backend = \"x_api\")."
371                    .to_string(),
372            });
373        }
374
375        // Require client_id when using official X API backend
376        let is_x_api_backend = backend.is_empty() || backend == "x_api";
377        if is_x_api_backend && self.x_api.client_id.trim().is_empty() {
378            errors.push(ConfigError::MissingField {
379                field: "x_api.client_id".to_string(),
380            });
381        }
382
383        // Validate storage.db_path is not empty/whitespace and not a directory
384        let db_path_trimmed = self.storage.db_path.trim();
385        if db_path_trimmed.is_empty() {
386            errors.push(ConfigError::InvalidValue {
387                field: "storage.db_path".to_string(),
388                message: "must not be empty or whitespace-only".to_string(),
389            });
390        } else {
391            let expanded = crate::startup::expand_tilde(db_path_trimmed);
392            if expanded.is_dir() {
393                errors.push(ConfigError::InvalidValue {
394                    field: "storage.db_path".to_string(),
395                    message: format!("'{}' is a directory, must point to a file", db_path_trimmed),
396                });
397            }
398        }
399
400        // Validate content sources against deployment capabilities
401        for (i, source) in self.content_sources.sources.iter().enumerate() {
402            if !self.deployment_mode.allows_source_type(&source.source_type) {
403                errors.push(ConfigError::InvalidValue {
404                    field: format!("content_sources.sources[{}].source_type", i),
405                    message: format!(
406                        "source type '{}' is not available in {} deployment mode",
407                        source.source_type, self.deployment_mode
408                    ),
409                });
410            }
411
412            // Validate change_detection value.
413            let valid_cd = [
414                super::types::CHANGE_DETECTION_AUTO,
415                super::types::CHANGE_DETECTION_POLL,
416                super::types::CHANGE_DETECTION_NONE,
417            ];
418            if !valid_cd.contains(&source.change_detection.as_str()) {
419                errors.push(ConfigError::InvalidValue {
420                    field: format!("content_sources.sources[{}].change_detection", i),
421                    message: format!(
422                        "must be one of: auto, poll, none — got '{}'",
423                        source.change_detection
424                    ),
425                });
426            }
427
428            // Validate poll_interval_seconds minimum.
429            if let Some(interval) = source.poll_interval_seconds {
430                if interval < super::types::MIN_POLL_INTERVAL_SECONDS {
431                    errors.push(ConfigError::InvalidValue {
432                        field: format!("content_sources.sources[{}].poll_interval_seconds", i),
433                        message: format!(
434                            "must be at least {} seconds, got {}",
435                            super::types::MIN_POLL_INTERVAL_SECONDS,
436                            interval
437                        ),
438                    });
439                }
440            }
441
442            // Validate enabled sources have required fields.
443            if source.is_enabled() {
444                if source.source_type == "local_fs"
445                    && source.path.as_ref().map_or(true, |p| p.is_empty())
446                {
447                    errors.push(ConfigError::MissingField {
448                        field: format!(
449                            "content_sources.sources[{}].path (required for enabled local_fs source)",
450                            i
451                        ),
452                    });
453                }
454                if source.source_type == "google_drive"
455                    && source.folder_id.as_ref().map_or(true, |f| f.is_empty())
456                {
457                    errors.push(ConfigError::MissingField {
458                        field: format!(
459                            "content_sources.sources[{}].folder_id (required for enabled google_drive source)",
460                            i
461                        ),
462                    });
463                }
464            }
465
466            // Warn if both connection_id and service_account_key are set.
467            // Not a blocking error -- session 04 handles precedence.
468            if source.source_type == "google_drive"
469                && source.connection_id.is_some()
470                && source.service_account_key.is_some()
471            {
472                tracing::warn!(
473                    source_index = i,
474                    "content_sources.sources[{}] has both connection_id and \
475                     service_account_key; connection_id takes precedence",
476                    i
477                );
478            }
479
480            // Warn if a google_drive source has neither auth method configured.
481            // The Watchtower will skip this source at runtime, but surface it
482            // during validation so the user knows to connect via the dashboard.
483            if source.source_type == "google_drive"
484                && source.is_enabled()
485                && source.connection_id.is_none()
486                && source.service_account_key.is_none()
487            {
488                tracing::warn!(
489                    source_index = i,
490                    "content_sources.sources[{}] has no authentication configured \
491                     (neither connection_id nor service_account_key); this source \
492                     will be skipped at runtime -- connect via Settings > Content Sources",
493                    i
494                );
495            }
496        }
497
498        if errors.is_empty() {
499            Ok(())
500        } else {
501            Err(errors)
502        }
503    }
504}
505
506/// Check if a string is a valid HH:MM time (24h format).
507fn is_valid_hhmm(s: &str) -> bool {
508    let parts: Vec<&str> = s.split(':').collect();
509    if parts.len() != 2 {
510        return false;
511    }
512    let Ok(hour) = parts[0].parse::<u8>() else {
513        return false;
514    };
515    let Ok(minute) = parts[1].parse::<u8>() else {
516        return false;
517    };
518    hour <= 23 && minute <= 59
519}
520
521#[cfg(test)]
522mod tests {
523    use super::*;
524
525    fn minimal_valid_config() -> Config {
526        let mut c = Config::default();
527        c.business.product_name = "TestBot".to_string();
528        c.business.product_description = "A test product for unit testing".to_string();
529        c.business.product_keywords = vec!["test".to_string()];
530        c
531    }
532
533    // ── validate_minimum ──────────────────────────────────────────────────
534
535    #[test]
536    fn validate_minimum_default_config_fails() {
537        let c = Config::default();
538        assert!(c.validate_minimum().is_err());
539    }
540
541    #[test]
542    fn validate_minimum_populated_config_passes() {
543        let c = minimal_valid_config();
544        assert!(c.validate_minimum().is_ok(), "{:?}", c.validate_minimum());
545    }
546
547    #[test]
548    fn validate_minimum_missing_product_name_fails() {
549        let mut c = minimal_valid_config();
550        c.business.product_name = String::new();
551        let errs = c.validate_minimum().unwrap_err();
552        assert!(errs
553            .iter()
554            .any(|e| format!("{e:?}").contains("product_name")));
555    }
556
557    #[test]
558    fn validate_minimum_missing_description_fails() {
559        let mut c = minimal_valid_config();
560        c.business.product_description = "   ".to_string(); // whitespace only
561        let errs = c.validate_minimum().unwrap_err();
562        assert!(errs
563            .iter()
564            .any(|e| format!("{e:?}").contains("product_description")));
565    }
566
567    #[test]
568    fn validate_minimum_missing_both_keyword_fields_fails() {
569        let mut c = minimal_valid_config();
570        c.business.product_keywords = vec![];
571        c.business.competitor_keywords = vec![];
572        let errs = c.validate_minimum().unwrap_err();
573        assert!(errs.iter().any(|e| format!("{e:?}").contains("keywords")));
574    }
575
576    #[test]
577    fn validate_minimum_competitor_keywords_satisfies_keyword_requirement() {
578        let mut c = minimal_valid_config();
579        c.business.product_keywords = vec![];
580        c.business.competitor_keywords = vec!["competitor".to_string()];
581        assert!(c.validate_minimum().is_ok(), "{:?}", c.validate_minimum());
582    }
583
584    #[test]
585    fn validate_minimum_invalid_llm_provider_fails() {
586        let mut c = minimal_valid_config();
587        c.llm.provider = "invalid_provider".to_string();
588        let errs = c.validate_minimum().unwrap_err();
589        assert!(errs
590            .iter()
591            .any(|e| format!("{e:?}").contains("llm.provider")));
592    }
593
594    #[test]
595    fn validate_minimum_valid_llm_providers_pass() {
596        for provider in &["openai", "anthropic", "ollama", "groq"] {
597            let mut c = minimal_valid_config();
598            c.llm.provider = provider.to_string();
599            assert!(
600                c.validate_minimum().is_ok(),
601                "provider {provider} should pass"
602            );
603        }
604    }
605
606    #[test]
607    fn validate_minimum_invalid_provider_backend_fails() {
608        let mut c = minimal_valid_config();
609        c.x_api.provider_backend = "invalid_backend".to_string();
610        let errs = c.validate_minimum().unwrap_err();
611        assert!(errs
612            .iter()
613            .any(|e| format!("{e:?}").contains("provider_backend")));
614    }
615
616    #[test]
617    fn validate_minimum_valid_provider_backends_pass() {
618        for backend in &["x_api", "scraper"] {
619            let mut c = minimal_valid_config();
620            c.x_api.provider_backend = backend.to_string();
621            assert!(
622                c.validate_minimum().is_ok(),
623                "backend {backend} should pass"
624            );
625        }
626    }
627
628    // ── validate (full) ───────────────────────────────────────────────────
629
630    #[test]
631    fn validate_default_config_fails() {
632        let c = Config::default();
633        assert!(c.validate().is_err());
634    }
635
636    #[test]
637    fn validate_collects_multiple_errors() {
638        let c = Config::default();
639        let errs = c.validate().unwrap_err();
640        assert!(errs.len() >= 2, "expected ≥2 errors, got {}", errs.len());
641    }
642
643    // ── is_valid_hhmm ────────────────────────────────────────────────────
644
645    #[test]
646    fn is_valid_hhmm_valid_times() {
647        assert!(is_valid_hhmm("00:00"));
648        assert!(is_valid_hhmm("09:30"));
649        assert!(is_valid_hhmm("23:59"));
650        assert!(is_valid_hhmm("12:00"));
651    }
652
653    #[test]
654    fn is_valid_hhmm_invalid_times() {
655        assert!(!is_valid_hhmm("24:00")); // hour out of range
656        assert!(!is_valid_hhmm("12:60")); // minute out of range
657        assert!(!is_valid_hhmm("noon")); // non-numeric
658        assert!(!is_valid_hhmm("")); // empty
659        assert!(!is_valid_hhmm("12:30:00")); // too many parts
660        assert!(!is_valid_hhmm("1230")); // no colon
661    }
662}