Skip to main content

subx_cli/config/
validator.rs

1//! High-level configuration validation for configuration sections.
2//!
3//! This module provides validation for complete configuration sections and
4//! the entire configuration structure. It builds upon the low-level validation
5//! functions from the [`crate::config::validation`] module.
6//!
7//! # Architecture
8//!
9//! - [`crate::config::validation`] - Low-level validation functions for individual values
10//! - [`crate::config::validator`] (this module) - High-level configuration section validators
11//! - [`crate::config::field_validator`] - Key-value validation for configuration service
12
13use super::validation::*;
14use crate::Result;
15use crate::config::Config;
16use crate::config::{
17    AIConfig, FormatsConfig, GeneralConfig, ParallelConfig, SyncConfig, TranslationConfig,
18    VadConfig,
19};
20use crate::error::SubXError;
21
22/// Validate the complete configuration.
23///
24/// This function validates all configuration sections and their
25/// interdependencies.
26///
27/// # Arguments
28/// * `config` - The configuration to validate
29///
30/// # Errors
31/// Returns the first validation error encountered.
32pub fn validate_config(config: &Config) -> Result<()> {
33    validate_ai_config(&config.ai)?;
34    validate_sync_config(&config.sync)?;
35    validate_general_config(&config.general)?;
36    validate_formats_config(&config.formats)?;
37    validate_parallel_config(&config.parallel)?;
38    validate_translation_config(&config.translation)?;
39
40    // Cross-section validation
41    validate_config_consistency(config)?;
42
43    Ok(())
44}
45
46/// Validate AI configuration section.
47pub fn validate_ai_config(ai_config: &AIConfig) -> Result<()> {
48    validate_non_empty_string(&ai_config.provider, "AI provider")?;
49
50    // Validation arms key off the canonical provider value so callers may
51    // pass either `local` or the alias `ollama` without divergence.
52    let canonical = crate::config::field_validator::normalize_ai_provider(&ai_config.provider);
53
54    // Validate provider-specific settings
55    match canonical.as_str() {
56        "openai" => {
57            if let Some(api_key) = &ai_config.api_key {
58                if !api_key.is_empty() {
59                    validate_api_key(api_key)?;
60                    if !api_key.starts_with("sk-") {
61                        return Err(SubXError::config("OpenAI API key must start with 'sk-'"));
62                    }
63                }
64            }
65            validate_ai_model(&ai_config.model)?;
66            validate_temperature(ai_config.temperature)?;
67            validate_positive_number(ai_config.max_tokens as f64)?;
68
69            if !ai_config.base_url.is_empty() {
70                validate_url_format(&ai_config.base_url)?;
71                require_https_for_hosted_provider(&ai_config.base_url, "openai")?;
72            }
73        }
74        "openrouter" => {
75            if let Some(api_key) = &ai_config.api_key {
76                if !api_key.is_empty() {
77                    validate_api_key(api_key)?;
78                    // OpenRouter API keys have no specific prefix requirement
79                }
80            }
81            validate_ai_model(&ai_config.model)?;
82            validate_temperature(ai_config.temperature)?;
83            validate_positive_number(ai_config.max_tokens as f64)?;
84
85            if !ai_config.base_url.is_empty() {
86                validate_url_format(&ai_config.base_url)?;
87                require_https_for_hosted_provider(&ai_config.base_url, "openrouter")?;
88            }
89        }
90        "anthropic" => {
91            if let Some(api_key) = &ai_config.api_key {
92                if !api_key.is_empty() {
93                    validate_api_key(api_key)?;
94                }
95            }
96            validate_ai_model(&ai_config.model)?;
97            validate_temperature(ai_config.temperature)?;
98        }
99        "azure-openai" => {
100            if let Some(api_key) = &ai_config.api_key {
101                if !api_key.is_empty() {
102                    validate_api_key(api_key)?;
103                }
104            }
105            validate_ai_model(&ai_config.model)?;
106            validate_temperature(ai_config.temperature)?;
107            validate_positive_number(ai_config.max_tokens as f64)?;
108            if let Some(ver) = &ai_config.api_version {
109                if ver.trim().is_empty() {
110                    return Err(SubXError::config(
111                        "Azure OpenAI api_version must not be empty",
112                    ));
113                }
114            }
115            if !ai_config.base_url.is_empty() {
116                validate_url_format(&ai_config.base_url)?;
117                require_https_for_hosted_provider(&ai_config.base_url, "azure-openai")?;
118            }
119        }
120        "local" => {
121            // `api_key` is optional for the local provider. When supplied,
122            // it is run through the same permissive `validate_api_key`
123            // helper used by other providers (no prefix requirement).
124            if let Some(api_key) = &ai_config.api_key {
125                if !api_key.is_empty() {
126                    validate_api_key(api_key)?;
127                }
128            }
129            // `base_url` is required for the local provider — there is no
130            // safe default because every runtime listens on a different
131            // port. Both http:// and https:// are accepted; the local
132            // provider is endpoint-agnostic and may target loopback, LAN,
133            // VPN, or public hosts.
134            if ai_config.base_url.trim().is_empty() {
135                return Err(SubXError::config(
136                    "ai.base_url is required when ai.provider is `local` \
137                     (e.g. http://localhost:11434/v1 for Ollama, \
138                     http://localhost:1234/v1 for LM Studio, \
139                     http://localhost:8080/v1 for llama.cpp's llama-server)",
140                ));
141            }
142            validate_url_format(&ai_config.base_url)?;
143            validate_ai_model(&ai_config.model)?;
144            validate_temperature(ai_config.temperature)?;
145            validate_positive_number(ai_config.max_tokens as f64)?;
146        }
147        _ => {
148            return Err(SubXError::config(format!(
149                "Unsupported AI provider: {}. Supported providers: openai, openrouter, anthropic, azure-openai, local",
150                ai_config.provider
151            )));
152        }
153    }
154
155    // Validate retry settings
156    validate_positive_number(ai_config.retry_attempts as f64)?;
157    if ai_config.retry_attempts > 10 {
158        return Err(SubXError::config("Retry count cannot exceed 10 times"));
159    }
160
161    // Validate timeout settings
162    validate_range(ai_config.request_timeout_seconds as f64, 10.0, 600.0)
163        .map_err(|_| SubXError::config("Request timeout must be between 10 and 600 seconds"))?;
164
165    Ok(())
166}
167
168/// Reject any user-set `ai.base_url` whose scheme is not `https://` for
169/// hosted providers (`openai`, `openrouter`, `azure-openai`).
170///
171/// The error message names the field, the unsupported scheme, states that
172/// HTTPS is required for hosted providers, and appends the canonical
173/// `local_provider_hint()` so users who actually wanted to call an
174/// OpenAI-compatible local or LAN endpoint get a one-line fix.
175fn require_https_for_hosted_provider(base_url: &str, provider: &str) -> Result<()> {
176    let parsed = url::Url::parse(base_url)
177        .map_err(|_| SubXError::config(format!("Invalid URL format: {base_url}")))?;
178    let scheme = parsed.scheme();
179    if scheme != "https" {
180        return Err(SubXError::config(format!(
181            "ai.base_url uses unsupported scheme `{scheme}://` for hosted provider `{provider}`; \
182             hosted providers require HTTPS. {}",
183            crate::services::ai::local_provider_hint(),
184        )));
185    }
186    Ok(())
187}
188
189/// Validate sync configuration section.
190pub fn validate_sync_config(sync_config: &SyncConfig) -> Result<()> {
191    // Delegate to SyncConfig's validation with enhancements
192    sync_config.validate()
193}
194
195/// Validate general configuration section.
196pub fn validate_general_config(general_config: &GeneralConfig) -> Result<()> {
197    // Validate concurrent jobs
198    validate_positive_number(general_config.max_concurrent_jobs as f64)?;
199    if general_config.max_concurrent_jobs > 64 {
200        return Err(SubXError::config(
201            "Maximum concurrent jobs should not exceed 64",
202        ));
203    }
204
205    // Validate timeout settings
206    validate_range(general_config.task_timeout_seconds as f64, 30.0, 3600.0)
207        .map_err(|_| SubXError::config("Task timeout must be between 30 and 3600 seconds"))?;
208
209    validate_range(
210        general_config.worker_idle_timeout_seconds as f64,
211        10.0,
212        3600.0,
213    )
214    .map_err(|_| SubXError::config("Worker idle timeout must be between 10 and 3600 seconds"))?;
215
216    Ok(())
217}
218
219/// Validate formats configuration section.
220pub fn validate_formats_config(formats_config: &FormatsConfig) -> Result<()> {
221    // Check default output format
222    validate_non_empty_string(&formats_config.default_output, "Default output format")?;
223    validate_enum(
224        &formats_config.default_output,
225        &["srt", "ass", "vtt", "webvtt"],
226    )?;
227
228    // Check default encoding
229    validate_non_empty_string(&formats_config.default_encoding, "Default encoding")?;
230    validate_enum(
231        &formats_config.default_encoding,
232        &["utf-8", "gbk", "big5", "shift_jis"],
233    )?;
234
235    // Check encoding detection confidence
236    validate_range(formats_config.encoding_detection_confidence, 0.0, 1.0).map_err(|_| {
237        SubXError::config("Encoding detection confidence must be between 0.0 and 1.0")
238    })?;
239
240    Ok(())
241}
242
243/// Validate parallel processing configuration.
244pub fn validate_parallel_config(parallel_config: &ParallelConfig) -> Result<()> {
245    // Check max workers
246    validate_positive_number(parallel_config.max_workers as f64)?;
247    if parallel_config.max_workers > 64 {
248        return Err(SubXError::config("Maximum workers should not exceed 64"));
249    }
250
251    // Check task queue size
252    validate_positive_number(parallel_config.task_queue_size as f64)?;
253    if parallel_config.task_queue_size < 100 {
254        return Err(SubXError::config("Task queue size should be at least 100"));
255    }
256
257    Ok(())
258}
259
260/// Validate translation configuration section.
261///
262/// Ensures `batch_size` is a positive integer and that
263/// `default_target_language`, when set, is a non-empty trimmed string.
264pub fn validate_translation_config(translation_config: &TranslationConfig) -> Result<()> {
265    if translation_config.batch_size == 0 {
266        return Err(SubXError::config(
267            "translation.batch_size must be greater than zero",
268        ));
269    }
270    if translation_config.batch_size > 1000 {
271        return Err(SubXError::config(
272            "translation.batch_size should not exceed 1000",
273        ));
274    }
275    if let Some(lang) = &translation_config.default_target_language {
276        if lang.trim().is_empty() {
277            return Err(SubXError::config(
278                "translation.default_target_language must not be empty",
279            ));
280        }
281    }
282    Ok(())
283}
284
285/// Validate configuration consistency across sections.
286fn validate_config_consistency(config: &Config) -> Result<()> {
287    // Example: Ensure AI is properly configured if using AI features
288    if config.ai.provider == "openai" {
289        if let Some(api_key) = &config.ai.api_key {
290            if api_key.is_empty() {
291                return Err(SubXError::config(
292                    "OpenAI provider is selected but API key is empty",
293                ));
294            }
295        }
296        // Note: We don't require API key for default config to allow basic operation
297    }
298
299    // Ensure reasonable resource allocation
300    if config.parallel.max_workers > config.general.max_concurrent_jobs {
301        log::warn!(
302            "Parallel max_workers ({}) exceeds general max_concurrent_jobs ({})",
303            config.parallel.max_workers,
304            config.general.max_concurrent_jobs
305        );
306    }
307
308    Ok(())
309}
310
311impl SyncConfig {
312    /// Validate the sync configuration for correctness.
313    ///
314    /// Checks all sync-related configuration parameters to ensure they
315    /// are within valid ranges and have acceptable values.
316    ///
317    /// # Returns
318    ///
319    /// Returns `Ok(())` if validation passes, or an error describing
320    /// the validation failure.
321    ///
322    /// # Errors
323    ///
324    /// This function returns an error if:
325    /// - `default_method` is not one of the supported methods
326    /// - `max_offset_seconds` is outside the valid range
327    /// - VAD configuration validation fails
328    pub fn validate(&self) -> Result<()> {
329        // Validate default_method
330        validate_enum(&self.default_method, &["vad", "auto", "manual"])?;
331
332        // Validate max_offset_seconds
333        validate_positive_number(self.max_offset_seconds)?;
334        if self.max_offset_seconds > 3600.0 {
335            return Err(SubXError::config(
336                "sync.max_offset_seconds should not exceed 3600 seconds (1 hour). If a larger value is needed, please verify the sync requirements are reasonable.",
337            ));
338        }
339
340        // Provide recommendations for common use cases
341        if self.max_offset_seconds < 5.0 {
342            log::warn!(
343                "sync.max_offset_seconds is set to {:.1}s which may be too small. Consider using 30.0-60.0 seconds.",
344                self.max_offset_seconds
345            );
346        } else if self.max_offset_seconds > 600.0 && self.max_offset_seconds <= 3600.0 {
347            log::warn!(
348                "sync.max_offset_seconds is set to {:.1}s which is quite large. Please confirm this meets your requirements.",
349                self.max_offset_seconds
350            );
351        }
352
353        // Validate sub-configurations
354        self.vad.validate()?;
355
356        Ok(())
357    }
358}
359
360impl VadConfig {
361    /// Validate the local VAD configuration for correctness.
362    ///
363    /// Ensures that all VAD-related parameters are within acceptable
364    /// ranges and have valid values for audio processing.
365    ///
366    /// # Returns
367    ///
368    /// Returns `Ok(())` if validation passes, or an error describing
369    /// the validation failure.
370    ///
371    /// # Errors
372    ///
373    /// This function returns an error if:
374    /// - `sensitivity` is outside the valid range (0.0-1.0)
375    pub fn validate(&self) -> Result<()> {
376        // Validate sensitivity range
377        if !(0.0..=1.0).contains(&self.sensitivity) {
378            return Err(SubXError::config(
379                "VAD sensitivity must be between 0.0 and 1.0",
380            ));
381        }
382        // Validate padding_chunks
383        if self.padding_chunks > 10 {
384            return Err(SubXError::config("VAD padding_chunks must not exceed 10"));
385        }
386        // Validate minimum speech duration
387        if self.min_speech_duration_ms > 5000 {
388            return Err(SubXError::config(
389                "VAD min_speech_duration_ms must not exceed 5000ms",
390            ));
391        }
392        Ok(())
393    }
394}
395
396#[cfg(test)]
397mod tests {
398    use super::*;
399    use crate::config::{AIConfig, Config, SyncConfig, VadConfig};
400
401    #[test]
402    fn test_validate_default_config() {
403        let config = Config::default();
404        assert!(validate_config(&config).is_ok());
405    }
406
407    #[test]
408    fn test_validate_ai_config_valid() {
409        let ai_config = AIConfig {
410            provider: "openai".to_string(),
411            api_key: Some("sk-test123456789".to_string()),
412            temperature: 0.8,
413            ..Default::default()
414        };
415        assert!(validate_ai_config(&ai_config).is_ok());
416
417        // openrouter test
418        let ai_config = AIConfig {
419            provider: "openrouter".to_string(),
420            api_key: Some("test-openrouter-key".to_string()),
421            model: "deepseek/deepseek-r1-0528:free".to_string(),
422            ..Default::default()
423        };
424        assert!(validate_ai_config(&ai_config).is_ok());
425
426        // azure-openai test
427        let ai_config = AIConfig {
428            provider: "azure-openai".to_string(),
429            api_key: Some("azure-key-123".to_string()),
430            model: "dep123".to_string(),
431            api_version: Some("2025-04-01-preview".to_string()),
432            ..Default::default()
433        };
434        assert!(validate_ai_config(&ai_config).is_ok());
435    }
436
437    #[test]
438    fn test_validate_ai_config_invalid_provider() {
439        let ai_config = AIConfig {
440            provider: "invalid".to_string(),
441            ..Default::default()
442        };
443        let err = validate_ai_config(&ai_config).unwrap_err();
444        assert!(err.to_string().contains(
445            "Unsupported AI provider: invalid. Supported providers: openai, openrouter, anthropic, azure-openai, local"
446        ));
447    }
448
449    // ── local provider arm ───────────────────────────────────────────────────
450
451    #[test]
452    fn test_validate_ai_config_local_without_api_key() {
453        let ai_config = AIConfig {
454            provider: "local".to_string(),
455            api_key: None,
456            base_url: "http://localhost:11434/v1".to_string(),
457            model: "llama3.1:8b-instruct".to_string(),
458            ..Default::default()
459        };
460        assert!(validate_ai_config(&ai_config).is_ok());
461    }
462
463    #[test]
464    fn test_validate_ai_config_local_with_empty_api_key() {
465        let ai_config = AIConfig {
466            provider: "local".to_string(),
467            api_key: Some("".to_string()),
468            base_url: "http://localhost:11434/v1".to_string(),
469            model: "llama3.1:8b-instruct".to_string(),
470            ..Default::default()
471        };
472        assert!(validate_ai_config(&ai_config).is_ok());
473    }
474
475    #[test]
476    fn test_validate_ai_config_local_rejects_empty_base_url() {
477        let ai_config = AIConfig {
478            provider: "local".to_string(),
479            api_key: None,
480            base_url: "".to_string(),
481            model: "llama3.1".to_string(),
482            ..Default::default()
483        };
484        let err = validate_ai_config(&ai_config).unwrap_err();
485        let msg = err.to_string();
486        assert!(
487            msg.contains("ai.base_url"),
488            "error must name ai.base_url field, got: {msg}"
489        );
490        assert!(
491            msg.contains("local"),
492            "error must mention `local` provider, got: {msg}"
493        );
494    }
495
496    #[test]
497    fn test_validate_ai_config_local_accepts_lan_http_base_url() {
498        let ai_config = AIConfig {
499            provider: "local".to_string(),
500            api_key: None,
501            base_url: "http://192.168.50.50:11434/v1".to_string(),
502            model: "llama3.1".to_string(),
503            ..Default::default()
504        };
505        assert!(validate_ai_config(&ai_config).is_ok());
506    }
507
508    #[test]
509    fn test_validate_ai_config_local_accepts_https_tailnet_base_url() {
510        let ai_config = AIConfig {
511            provider: "local".to_string(),
512            api_key: None,
513            base_url: "https://ollama.tailnet.ts.net/v1".to_string(),
514            model: "qwen2.5:7b".to_string(),
515            ..Default::default()
516        };
517        assert!(validate_ai_config(&ai_config).is_ok());
518    }
519
520    #[test]
521    fn test_validate_ai_config_local_accepts_ollama_alias_via_normalize() {
522        // The validation arm keys off the canonical value, so an `ollama`
523        // alias (as it would arrive from `SUBX_AI_PROVIDER=ollama` before
524        // service.rs canonicalizes it) is treated identically to `local`.
525        let ai_config = AIConfig {
526            provider: "ollama".to_string(),
527            api_key: None,
528            base_url: "http://localhost:11434/v1".to_string(),
529            model: "llama3.1".to_string(),
530            ..Default::default()
531        };
532        assert!(validate_ai_config(&ai_config).is_ok());
533    }
534
535    // ── §1.9: hosted providers require HTTPS for user-set base_url ───────────
536
537    #[test]
538    fn test_validate_ai_config_openai_rejects_http_base_url() {
539        let ai_config = AIConfig {
540            provider: "openai".to_string(),
541            api_key: Some("sk-test1234567890".to_string()),
542            base_url: "http://localhost:11434/v1".to_string(),
543            ..Default::default()
544        };
545        let err = validate_ai_config(&ai_config).unwrap_err();
546        let msg = err.to_string();
547        assert!(msg.contains("ai.base_url"), "msg={msg}");
548        assert!(msg.contains("http"), "msg={msg}");
549        assert!(msg.contains("HTTPS"), "msg={msg}");
550        // Hint mentions both `local` and `ollama`.
551        assert!(msg.contains("local"), "msg={msg}");
552        assert!(msg.contains("ollama"), "msg={msg}");
553    }
554
555    #[test]
556    fn test_validate_ai_config_openrouter_rejects_http_base_url() {
557        let ai_config = AIConfig {
558            provider: "openrouter".to_string(),
559            api_key: Some("test-openrouter-key".to_string()),
560            base_url: "http://x.example.com/v1".to_string(),
561            model: "deepseek/deepseek-r1-0528:free".to_string(),
562            ..Default::default()
563        };
564        let err = validate_ai_config(&ai_config).unwrap_err();
565        let msg = err.to_string();
566        assert!(msg.contains("ai.base_url"), "msg={msg}");
567        assert!(msg.contains("HTTPS"), "msg={msg}");
568        assert!(msg.contains("local") && msg.contains("ollama"), "msg={msg}");
569    }
570
571    #[test]
572    fn test_validate_ai_config_azure_openai_rejects_http_base_url() {
573        let ai_config = AIConfig {
574            provider: "azure-openai".to_string(),
575            api_key: Some("azure-key-123".to_string()),
576            model: "dep123".to_string(),
577            api_version: Some("2025-04-01-preview".to_string()),
578            base_url: "http://example.openai.azure.com".to_string(),
579            ..Default::default()
580        };
581        let err = validate_ai_config(&ai_config).unwrap_err();
582        let msg = err.to_string();
583        assert!(msg.contains("ai.base_url"), "msg={msg}");
584        assert!(msg.contains("HTTPS"), "msg={msg}");
585        assert!(msg.contains("local") && msg.contains("ollama"), "msg={msg}");
586    }
587
588    #[test]
589    fn test_validate_ai_config_hosted_https_base_url_accepted() {
590        for provider in &["openai", "openrouter", "azure-openai"] {
591            let ai_config = AIConfig {
592                provider: provider.to_string(),
593                api_key: Some(if *provider == "openai" {
594                    "sk-test-key-12345".to_string()
595                } else {
596                    "any-key-12345".to_string()
597                }),
598                base_url: "https://example.com/v1".to_string(),
599                api_version: if *provider == "azure-openai" {
600                    Some("2025-04-01-preview".to_string())
601                } else {
602                    None
603                },
604                ..Default::default()
605            };
606            assert!(
607                validate_ai_config(&ai_config).is_ok(),
608                "provider={provider} should accept https base_url"
609            );
610        }
611    }
612
613    #[test]
614    fn test_validate_ai_config_hosted_default_base_url_unaffected() {
615        // The default `https://api.openai.com/v1` SHALL still pass for
616        // hosted providers without the user explicitly setting base_url.
617        let ai_config = AIConfig {
618            provider: "openai".to_string(),
619            api_key: Some("sk-test-key".to_string()),
620            ..Default::default()
621        };
622        assert!(validate_ai_config(&ai_config).is_ok());
623    }
624
625    #[test]
626    fn test_validate_ai_config_local_unaffected_by_https_rule() {
627        // Regression: the HTTPS-required rule must not bleed into local.
628        let http = AIConfig {
629            provider: "local".to_string(),
630            base_url: "http://10.0.0.5:11434/v1".to_string(),
631            model: "llama3.1".to_string(),
632            ..Default::default()
633        };
634        let https = AIConfig {
635            provider: "local".to_string(),
636            base_url: "https://internal.example.com/v1".to_string(),
637            model: "llama3.1".to_string(),
638            ..Default::default()
639        };
640        assert!(validate_ai_config(&http).is_ok());
641        assert!(validate_ai_config(&https).is_ok());
642    }
643
644    #[test]
645    fn test_validate_ai_config_invalid_temperature() {
646        let ai_config = AIConfig {
647            provider: "openai".to_string(),
648            temperature: 3.0, // Too high
649            ..Default::default()
650        };
651        assert!(validate_ai_config(&ai_config).is_err());
652    }
653
654    #[test]
655    fn test_validate_ai_config_invalid_openai_key() {
656        let ai_config = AIConfig {
657            provider: "openai".to_string(),
658            api_key: Some("invalid-key".to_string()),
659            ..Default::default()
660        };
661        assert!(validate_ai_config(&ai_config).is_err());
662    }
663
664    #[test]
665    fn test_validate_sync_config_valid() {
666        let sync_config = SyncConfig::default();
667        assert!(validate_sync_config(&sync_config).is_ok());
668    }
669
670    #[test]
671    fn test_validate_vad_config_invalid_sensitivity() {
672        let vad_config = VadConfig {
673            sensitivity: 1.5, // Too high (should be 0.0-1.0)
674            ..Default::default()
675        };
676        assert!(vad_config.validate().is_err());
677    }
678
679    #[test]
680    fn test_validate_config_consistency() {
681        let mut config = Config::default();
682        config.ai.provider = "openai".to_string();
683        config.ai.api_key = Some("".to_string()); // Empty API key should fail
684        assert!(validate_config(&config).is_err());
685
686        // Valid case with proper API key
687        config.ai.api_key = Some("sk-valid123".to_string());
688        assert!(validate_config(&config).is_ok());
689
690        // Valid case with no API key (default state)
691        config.ai.api_key = None;
692        assert!(validate_config(&config).is_ok());
693    }
694}