subx-cli 1.6.0

AI subtitle processing CLI tool, which automatically matches, renames, and converts subtitle files.
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
// src/config/mod.rs
#![allow(deprecated)]
//! Configuration management module for SubX.
//!
//! This module provides the complete configuration service system with
//! dependency injection support and comprehensive type definitions.
//!
//! # Key Components
//!
//! - [`Config`] - Main configuration structure containing all settings
//! - [`ConfigService`] - Service interface for configuration management
//! - [`ProductionConfigService`] - Production implementation with file I/O
//! - [`TestConfigService`] - Test implementation with controlled behavior
//! - [`TestConfigBuilder`] - Builder pattern for test configurations
//!
//! # Validation System
//!
//! The configuration system provides a layered validation architecture:
//!
//! - [`validation`] - Low-level validation functions for individual values
//! - [`validator`] - High-level configuration section validators
//! - [`field_validator`] - Key-value validation for configuration service
//!
//! ## Architecture
//!
//! ```text
//! ConfigService
//!//! field_validator (key-value validation)
//!//! validation (primitive validation functions)
//!
//! validator (section validation)
//!//! validation (primitive validation functions)
//! ```
//!
//! # Examples
//!
//! ```rust
//! use subx_cli::config::{Config, ConfigService, ProductionConfigService};
//!
//! # fn main() -> Result<(), Box<dyn std::error::Error>> {
//! // Create a production configuration service
//! let config_service = ProductionConfigService::new()?;
//!
//! // Load configuration
//! let config = config_service.get_config()?;
//! println!("AI Provider: {}", config.ai.provider);
//! # Ok(())
//! # }
//! ```
//!
//! # Architecture
//!
//! The configuration system uses dependency injection to provide testable
//! and maintainable configuration management. All configuration access
//! should go through the [`ConfigService`] trait.

use serde::{Deserialize, Serialize};
use std::fmt;
use std::path::PathBuf;

// Configuration service system
pub mod builder;
pub mod environment;
pub mod field_validator;
pub mod masking;
pub mod service;
pub mod test_macros;
pub mod test_service;
pub mod validation;
pub mod validator;

pub use masking::mask_sensitive_value;

// ============================================================================
// Configuration Type Definitions
// ============================================================================

/// Full application configuration for SubX.
///
/// This struct aggregates all settings for AI integration, subtitle format
/// conversion, synchronization, general options, and parallel execution.
///
/// # Examples
///
/// ```rust
/// use subx_cli::config::Config;
///
/// # fn main() -> Result<(), Box<dyn std::error::Error>> {
/// let config = Config::default();
/// assert_eq!(config.ai.provider, "openai");
/// assert_eq!(config.formats.default_output, "srt");
/// # Ok(())
/// # }
/// ```
///
/// # Serialization
///
/// This struct can be serialized to/from TOML format for configuration files.
///
/// ```rust
/// use subx_cli::config::Config;
///
/// # fn main() -> Result<(), Box<dyn std::error::Error>> {
/// let config = Config::default();
/// let toml_str = toml::to_string(&config)?;
/// assert!(toml_str.contains("[ai]"));
/// # Ok(())
/// # }
/// ```
#[derive(Debug, Serialize, Deserialize, Clone, Default)]
pub struct Config {
    /// AI service configuration parameters.
    pub ai: AIConfig,
    /// Subtitle format conversion settings.
    pub formats: FormatsConfig,
    /// Audio-subtitle synchronization options.
    pub sync: SyncConfig,
    /// General runtime options (e.g., backup enabled, job limits).
    pub general: GeneralConfig,
    /// Parallel processing parameters.
    pub parallel: ParallelConfig,
    /// Optional file path from which the configuration was loaded.
    pub loaded_from: Option<PathBuf>,
}

/// AI service configuration parameters.
///
/// This structure defines all configuration options for AI providers,
/// including authentication, model parameters, retry behavior, and timeouts.
///
/// # Examples
///
/// Creating a default configuration:
/// ```rust
/// use subx_cli::config::AIConfig;
///
/// let ai_config = AIConfig::default();
/// assert_eq!(ai_config.provider, "openai");
/// assert_eq!(ai_config.model, "gpt-4.1-mini");
/// assert_eq!(ai_config.temperature, 0.3);
/// ```
#[derive(Serialize, Deserialize, Clone)]
pub struct AIConfig {
    /// AI provider name (e.g. "openai", "anthropic").
    pub provider: String,
    /// API key for authentication.
    pub api_key: Option<String>,
    /// AI model name to use.
    pub model: String,
    /// API base URL.
    pub base_url: String,
    /// Maximum sample length per request.
    pub max_sample_length: usize,
    /// AI generation creativity parameter (0.0-1.0).
    pub temperature: f32,
    /// Maximum tokens in response.
    pub max_tokens: u32,
    /// Number of retries on request failure.
    pub retry_attempts: u32,
    /// Retry interval in milliseconds.
    pub retry_delay_ms: u64,
    /// HTTP request timeout in seconds.
    /// This controls how long to wait for a response from the AI service.
    /// For slow networks or complex requests, you may need to increase this value.
    pub request_timeout_seconds: u64,

    /// Azure OpenAI API version (optional, defaults to latest)
    #[serde(default)]
    pub api_version: Option<String>,
}

impl fmt::Debug for AIConfig {
    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
        f.debug_struct("AIConfig")
            .field("provider", &self.provider)
            .field("api_key", &self.api_key.as_ref().map(|_| "[REDACTED]"))
            .field("model", &self.model)
            .field("base_url", &self.base_url)
            .field("max_sample_length", &self.max_sample_length)
            .field("temperature", &self.temperature)
            .field("max_tokens", &self.max_tokens)
            .field("retry_attempts", &self.retry_attempts)
            .field("retry_delay_ms", &self.retry_delay_ms)
            .field("request_timeout_seconds", &self.request_timeout_seconds)
            .field("api_version", &self.api_version)
            .finish()
    }
}

impl Default for AIConfig {
    fn default() -> Self {
        Self {
            provider: "openai".to_string(),
            api_key: None,
            model: "gpt-4.1-mini".to_string(),
            base_url: "https://api.openai.com/v1".to_string(),
            max_sample_length: 3000,
            temperature: 0.3,
            max_tokens: 10000,
            retry_attempts: 3,
            retry_delay_ms: 1000,
            // Set to 120 seconds to handle slow networks and complex AI requests
            // This is especially important for users with high-latency connections
            request_timeout_seconds: 120,
            api_version: None,
        }
    }
}

/// Subtitle format related configuration.
///
/// Controls how subtitle files are processed, including format conversion,
/// encoding detection, and style preservation.
///
/// # Examples
///
/// ```rust
/// use subx_cli::config::FormatsConfig;
///
/// let formats = FormatsConfig::default();
/// assert_eq!(formats.default_output, "srt");
/// assert_eq!(formats.default_encoding, "utf-8");
/// assert!(!formats.preserve_styling);
/// ```
#[derive(Debug, Serialize, Deserialize, Clone)]
pub struct FormatsConfig {
    /// Default output format (e.g. "srt", "ass", "vtt").
    pub default_output: String,
    /// Whether to preserve style information during format conversion.
    pub preserve_styling: bool,
    /// Default character encoding (e.g. "utf-8", "gbk").
    pub default_encoding: String,
    /// Encoding detection confidence threshold (0.0-1.0).
    pub encoding_detection_confidence: f32,
}

impl Default for FormatsConfig {
    fn default() -> Self {
        Self {
            default_output: "srt".to_string(),
            preserve_styling: false,
            default_encoding: "utf-8".to_string(),
            encoding_detection_confidence: 0.8,
        }
    }
}

/// Audio synchronization configuration supporting VAD speech detection.
///
/// This configuration struct defines settings for subtitle-audio synchronization,
/// including method selection, timing constraints, and VAD-specific parameters.
#[derive(Debug, Serialize, Deserialize, Clone)]
pub struct SyncConfig {
    /// Default synchronization method ("vad", "auto")
    pub default_method: String,
    /// Maximum allowed time offset in seconds
    pub max_offset_seconds: f32,
    /// Local VAD related settings
    pub vad: VadConfig,

    // Deprecated legacy fields, preserved for backward compatibility
    /// Deprecated: correlation threshold for audio analysis
    #[deprecated]
    #[serde(skip)]
    pub correlation_threshold: f32,
    /// Deprecated: dialogue detection threshold
    #[deprecated]
    #[serde(skip)]
    pub dialogue_detection_threshold: f32,
    /// Deprecated: minimum dialogue duration in milliseconds
    #[deprecated]
    #[serde(skip)]
    pub min_dialogue_duration_ms: u32,
    /// Deprecated: dialogue merge gap in milliseconds
    #[deprecated]
    #[serde(skip)]
    pub dialogue_merge_gap_ms: u32,
    /// Deprecated: enable dialogue detection flag
    #[deprecated]
    #[serde(skip)]
    pub enable_dialogue_detection: bool,
    /// Deprecated: audio sample rate
    #[deprecated]
    #[serde(skip)]
    pub audio_sample_rate: u32,
    /// Deprecated: auto-detect sample rate flag  
    #[deprecated]
    #[serde(skip)]
    pub auto_detect_sample_rate: bool,
}

/// Local Voice Activity Detection configuration.
///
/// This struct defines parameters for local VAD processing, including sensitivity,
/// audio chunking, and speech segment filtering. Adjust these fields to control
/// how strictly speech is detected and how short segments are filtered out.
///
/// # Fields
///
/// - `enabled`: Whether local VAD is enabled
/// - `sensitivity`: Speech detection sensitivity (0.0-1.0). Lower values are stricter and less likely to classify audio as speech.
/// - `padding_chunks`: Number of non-speech chunks to include before and after detected speech
/// - `min_speech_duration_ms`: Minimum duration (ms) for a segment to be considered valid speech
///
/// # Examples
///
/// ```rust
/// use subx_cli::config::VadConfig;
///
/// let vad = VadConfig::default();
/// assert!(vad.enabled);
/// assert_eq!(vad.sensitivity, 0.25);
/// ```
#[derive(Debug, Serialize, Deserialize, Clone)]
pub struct VadConfig {
    /// Whether to enable local VAD method.
    pub enabled: bool,
    /// Speech detection sensitivity (0.0-1.0).
    ///
    /// Lower values are stricter: a smaller value means the detector is less likely to classify a chunk as speech.
    /// For example, 0.25 is more strict than 0.75.
    pub sensitivity: f32,
    /// Number of non-speech chunks to pad before and after detected speech.
    pub padding_chunks: u32,
    /// Minimum speech duration in milliseconds.
    ///
    /// Segments shorter than this value will be discarded as noise or non-speech.
    pub min_speech_duration_ms: u32,
}

#[allow(deprecated)]
impl Default for SyncConfig {
    fn default() -> Self {
        Self {
            default_method: "auto".to_string(),
            max_offset_seconds: 60.0,
            vad: VadConfig::default(),
            correlation_threshold: 0.8,
            dialogue_detection_threshold: 0.6,
            min_dialogue_duration_ms: 500,
            dialogue_merge_gap_ms: 200,
            enable_dialogue_detection: true,
            audio_sample_rate: 44100,
            auto_detect_sample_rate: true,
        }
    }
}

impl Default for VadConfig {
    fn default() -> Self {
        Self {
            enabled: true,
            sensitivity: 0.25, // Default changed to 0.25, the smaller the value, the stricter the detection
            padding_chunks: 3,
            min_speech_duration_ms: 300,
        }
    }
}

/// General configuration settings for the SubX CLI tool.
///
/// This struct contains general settings that control the overall behavior
/// of the application, including backup policies, processing limits, and
/// user interface preferences.
///
/// # Examples
///
/// ```rust
/// use subx_cli::config::GeneralConfig;
///
/// let config = GeneralConfig::default();
/// assert_eq!(config.max_concurrent_jobs, 4);
/// assert!(!config.backup_enabled);
/// ```
#[derive(Debug, Serialize, Deserialize, Clone)]
pub struct GeneralConfig {
    /// Enable automatic backup of original files.
    pub backup_enabled: bool,
    /// Maximum number of concurrent processing jobs.
    pub max_concurrent_jobs: usize,
    /// Task timeout in seconds.
    pub task_timeout_seconds: u64,
    /// Workspace directory for CLI commands (override current working directory).
    pub workspace: std::path::PathBuf,
    /// Enable progress bar display.
    pub enable_progress_bar: bool,
    /// Worker idle timeout in seconds.
    pub worker_idle_timeout_seconds: u64,
    /// Maximum subtitle file size in bytes (default 50 MiB).
    pub max_subtitle_bytes: u64,
    /// Maximum audio file size in bytes (default 2 GiB).
    pub max_audio_bytes: u64,
}

impl Default for GeneralConfig {
    fn default() -> Self {
        Self {
            backup_enabled: false,
            max_concurrent_jobs: 4,
            task_timeout_seconds: 300,
            // Default workspace is current directory
            workspace: std::path::PathBuf::from("."),
            enable_progress_bar: true,
            worker_idle_timeout_seconds: 60,
            max_subtitle_bytes: 52_428_800,
            max_audio_bytes: 2_147_483_648,
        }
    }
}

/// Parallel processing configuration.
///
/// Controls how parallel processing is performed, including worker
/// management, task distribution, and overflow handling strategies.
///
/// # Examples
///
/// ```rust
/// use subx_cli::config::{ParallelConfig, OverflowStrategy};
///
/// let parallel = ParallelConfig::default();
/// assert!(parallel.max_workers > 0);
/// assert_eq!(parallel.overflow_strategy, OverflowStrategy::Block);
/// ```
#[derive(Debug, Serialize, Deserialize, Clone)]
pub struct ParallelConfig {
    /// Maximum number of worker threads.
    pub max_workers: usize,
    /// Strategy for handling task overflow when queues are full.
    ///
    /// Determines the behavior when the task queue reaches capacity.
    /// - [`OverflowStrategy::Block`] - Block until space is available
    /// - [`OverflowStrategy::Drop`] - Drop new tasks when full
    /// - [`OverflowStrategy::Expand`] - Dynamically expand queue size
    pub overflow_strategy: OverflowStrategy,
    /// Task queue size.
    pub task_queue_size: usize,
    /// Enable task priorities.
    pub enable_task_priorities: bool,
    /// Auto-balance workers.
    pub auto_balance_workers: bool,
}

impl Default for ParallelConfig {
    fn default() -> Self {
        Self {
            max_workers: num_cpus::get(),
            overflow_strategy: OverflowStrategy::Block,
            task_queue_size: 1000,
            enable_task_priorities: false,
            auto_balance_workers: true,
        }
    }
}

/// Strategy for handling overflow when all workers are busy.
///
/// This enum defines different strategies for handling situations where
/// all worker threads are occupied and new tasks arrive.
///
/// # Examples
///
/// ```rust
/// use subx_cli::config::OverflowStrategy;
///
/// let strategy = OverflowStrategy::Block;
/// assert_eq!(strategy, OverflowStrategy::Block);
///
/// // Comparison and serialization
/// let strategies = vec![
///     OverflowStrategy::Block,
///     OverflowStrategy::Drop,
///     OverflowStrategy::Expand,
/// ];
/// assert_eq!(strategies.len(), 3);
/// ```
#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
pub enum OverflowStrategy {
    /// Block until a worker becomes available.
    ///
    /// This is the safest option as it ensures all tasks are processed,
    /// but may cause the application to become unresponsive.
    Block,
    /// Drop new tasks when all workers are busy.
    ///
    /// Use this when task loss is acceptable and responsiveness is critical.
    Drop,
    /// Create additional temporary workers.
    ///
    /// This can help with load spikes but may consume excessive resources.
    Expand,
    /// Drop oldest tasks in queue.
    ///
    /// Prioritizes recent tasks over older ones in the queue.
    DropOldest,
    /// Reject new tasks.
    ///
    /// Similar to Drop but may provide error feedback to the caller.
    Reject,
}

// ============================================================================
// Configuration Tests
// ============================================================================

#[cfg(test)]
mod config_tests {
    use super::*;

    #[test]
    fn test_ai_config_debug_redacts_api_key() {
        let ai = AIConfig {
            api_key: Some("sk-topsecret-1234567890".to_string()),
            ..AIConfig::default()
        };
        let rendered = format!("{:?}", ai);
        assert!(
            !rendered.contains("sk-topsecret-1234567890"),
            "Debug output must not leak API key: {rendered}"
        );
        assert!(
            rendered.contains("[REDACTED]"),
            "Debug output should mark api_key as redacted: {rendered}"
        );
    }

    #[test]
    fn test_ai_config_debug_none_api_key_is_none() {
        let ai = AIConfig::default();
        let rendered = format!("{:?}", ai);
        assert!(rendered.contains("api_key: None"), "got: {rendered}");
    }

    #[test]
    fn test_default_config_creation() {
        let config = Config::default();
        assert_eq!(config.ai.provider, "openai");
        assert_eq!(config.ai.model, "gpt-4.1-mini");
        assert_eq!(config.formats.default_output, "srt");
        assert!(!config.general.backup_enabled);
        assert_eq!(config.general.max_concurrent_jobs, 4);
    }

    #[test]
    fn test_ai_config_defaults() {
        let ai_config = AIConfig::default();
        assert_eq!(ai_config.provider, "openai");
        assert_eq!(ai_config.model, "gpt-4.1-mini");
        assert_eq!(ai_config.temperature, 0.3);
        assert_eq!(ai_config.max_sample_length, 3000);
        assert_eq!(ai_config.max_tokens, 10000);
    }

    #[test]
    fn test_ai_config_max_tokens_configuration() {
        let mut ai_config = AIConfig {
            max_tokens: 5000,
            ..Default::default()
        };
        assert_eq!(ai_config.max_tokens, 5000);

        // Test with different value
        ai_config.max_tokens = 20000;
        assert_eq!(ai_config.max_tokens, 20000);
    }

    #[test]
    fn test_new_sync_config_defaults() {
        let sync = SyncConfig::default();
        assert_eq!(sync.default_method, "auto");
        assert_eq!(sync.max_offset_seconds, 60.0);
        assert!(sync.vad.enabled);
    }

    #[test]
    fn test_sync_config_validation() {
        let mut sync = SyncConfig::default();

        // Valid configuration should pass validation
        assert!(sync.validate().is_ok());

        // Invalid default_method
        sync.default_method = "invalid".to_string();
        assert!(sync.validate().is_err());

        // Reset and test other invalid values
        sync = SyncConfig::default();
        sync.max_offset_seconds = -1.0;
        assert!(sync.validate().is_err());
    }

    #[test]
    fn test_vad_config_validation() {
        let mut vad = VadConfig::default();

        // Valid configuration
        assert!(vad.validate().is_ok());

        // Invalid sensitivity
        vad.sensitivity = 1.5;
        assert!(vad.validate().is_err());
    }

    #[test]
    fn test_config_serialization_with_new_sync() {
        let config = Config::default();
        let toml_str = toml::to_string(&config).unwrap();

        // Ensure new configuration structure exists in serialized output
        assert!(toml_str.contains("[sync]"));
        assert!(toml_str.contains("[sync.vad]"));
        assert!(toml_str.contains("default_method"));
        // Whisper-related fields removed, should not appear in serialized output
        assert!(!toml_str.contains("[sync.whisper]"));
        assert!(!toml_str.contains("analysis_window_seconds"));
    }
}

// ============================================================================
// Public API Re-exports
// ============================================================================

// Re-export the configuration service system
pub use builder::TestConfigBuilder;
pub use environment::{EnvironmentProvider, SystemEnvironmentProvider, TestEnvironmentProvider};
pub use service::{ConfigService, ProductionConfigService};
pub use test_service::TestConfigService;

// Re-export commonly used validation functions
pub use field_validator::validate_field;
pub use validator::validate_config;