sakurs_core/api/
config.rs

1//! Configuration API for sentence processing
2
3use crate::api::{Error, Language};
4use std::str::FromStr;
5
6/// Default configuration constants
7pub mod defaults {
8    /// Default chunk size in bytes (256KB)
9    pub const CHUNK_SIZE: usize = 256 * 1024;
10
11    /// Parallel processing threshold in bytes (1MB)
12    pub const PARALLEL_THRESHOLD: usize = 1024 * 1024;
13
14    /// Overlap size between chunks in bytes
15    pub const OVERLAP_SIZE: usize = 256;
16}
17
18/// Processing configuration
19#[derive(Debug, Clone)]
20pub struct Config {
21    pub(crate) language: Language,
22    pub(crate) chunk_size: usize,         // in bytes
23    pub(crate) parallel_threshold: usize, // minimum size for parallel processing
24    pub(crate) threads: Option<usize>,    // None = all available threads
25    pub(crate) overlap_size: usize,       // overlap between chunks in bytes
26}
27
28impl Default for Config {
29    fn default() -> Self {
30        Self {
31            language: Language::default(),
32            chunk_size: defaults::CHUNK_SIZE,
33            parallel_threshold: defaults::PARALLEL_THRESHOLD,
34            threads: None,
35            overlap_size: defaults::OVERLAP_SIZE,
36        }
37    }
38}
39
40impl Config {
41    /// Create a configuration builder
42    pub fn builder() -> ConfigBuilder {
43        ConfigBuilder::default()
44    }
45
46    /// Create a configuration optimized for small texts
47    pub fn small_text() -> Self {
48        Self {
49            language: Language::default(),
50            chunk_size: 8 * 1024,           // 8KB chunks
51            parallel_threshold: usize::MAX, // Never use parallel
52            threads: None,
53            overlap_size: 64, // Smaller overlap
54        }
55    }
56
57    /// Create a configuration optimized for large texts
58    pub fn large_text() -> Self {
59        Self {
60            language: Language::default(),
61            chunk_size: 512 * 1024,         // 512KB chunks
62            parallel_threshold: 512 * 1024, // 512KB threshold
63            threads: None,                  // Use all available cores
64            overlap_size: 512,              // Larger overlap
65        }
66    }
67
68    /// Create a configuration optimized for streaming
69    pub fn streaming() -> Self {
70        Self {
71            language: Language::default(),
72            chunk_size: 32 * 1024,          // 32KB chunks
73            parallel_threshold: 256 * 1024, // 256KB threshold
74            threads: Some(2),               // Limited parallelism
75            overlap_size: 128,              // Moderate overlap
76        }
77    }
78
79    /// Validate the configuration
80    pub(crate) fn validate(&self) -> Result<(), Error> {
81        if self.chunk_size == 0 {
82            return Err(Error::Configuration(
83                "chunk_size must be greater than 0".into(),
84            ));
85        }
86
87        if self.overlap_size >= self.chunk_size {
88            return Err(Error::Configuration(
89                "overlap_size must be less than chunk_size".into(),
90            ));
91        }
92
93        if let Some(threads) = self.threads {
94            if threads == 0 {
95                return Err(Error::Configuration(
96                    "threads must be greater than 0".into(),
97                ));
98            }
99        }
100
101        Ok(())
102    }
103}
104
105/// Fluent builder for configuration
106#[derive(Debug, Default)]
107pub struct ConfigBuilder {
108    language: Option<String>,
109    chunk_size: Option<usize>,
110    parallel_threshold: Option<usize>,
111    threads: Option<usize>,
112    overlap_size: Option<usize>,
113}
114
115impl ConfigBuilder {
116    /// Create a new configuration builder
117    pub fn new() -> Self {
118        Self::default()
119    }
120
121    /// Set the language by code
122    pub fn language(mut self, code: impl Into<String>) -> Result<Self, Error> {
123        self.language = Some(code.into());
124        Ok(self)
125    }
126
127    /// Set the chunk size in bytes
128    pub fn chunk_size(mut self, bytes: usize) -> Self {
129        self.chunk_size = Some(bytes);
130        self
131    }
132
133    /// Set the number of threads (None = all available)
134    pub fn threads(mut self, count: Option<usize>) -> Self {
135        self.threads = count;
136        self
137    }
138
139    /// Set the parallel processing threshold in bytes
140    pub fn parallel_threshold(mut self, bytes: usize) -> Self {
141        self.parallel_threshold = Some(bytes);
142        self
143    }
144
145    /// Set the overlap size between chunks in bytes
146    pub fn overlap_size(mut self, bytes: usize) -> Self {
147        self.overlap_size = Some(bytes);
148        self
149    }
150
151    /// Build the configuration
152    pub fn build(self) -> Result<Config, Error> {
153        let mut config = Config::default();
154
155        if let Some(lang_code) = self.language {
156            config.language = Language::from_str(&lang_code)?;
157        }
158
159        if let Some(size) = self.chunk_size {
160            config.chunk_size = size;
161        }
162
163        if let Some(threshold) = self.parallel_threshold {
164            config.parallel_threshold = threshold;
165        }
166
167        if self.threads.is_some() {
168            config.threads = self.threads;
169        }
170
171        if let Some(overlap) = self.overlap_size {
172            config.overlap_size = overlap;
173        }
174
175        config.validate()?;
176        Ok(config)
177    }
178}
179
180#[cfg(test)]
181mod tests {
182    use super::*;
183
184    #[test]
185    fn test_default_config() {
186        let config = Config::default();
187        assert_eq!(config.chunk_size, defaults::CHUNK_SIZE);
188        assert_eq!(config.parallel_threshold, defaults::PARALLEL_THRESHOLD);
189        assert_eq!(config.overlap_size, defaults::OVERLAP_SIZE);
190        assert!(config.threads.is_none());
191        assert!(config.validate().is_ok());
192    }
193
194    #[test]
195    fn test_config_validation() {
196        // Invalid chunk size
197        let config = Config {
198            chunk_size: 0,
199            ..Default::default()
200        };
201        assert!(config.validate().is_err());
202
203        // Invalid overlap size
204        let config = Config {
205            chunk_size: 100,
206            overlap_size: 200,
207            ..Default::default()
208        };
209        assert!(config.validate().is_err());
210
211        // Invalid thread count
212        let config = Config {
213            threads: Some(0),
214            ..Default::default()
215        };
216        assert!(config.validate().is_err());
217    }
218
219    #[test]
220    fn test_preset_configs() {
221        // Small text preset
222        let small = Config::small_text();
223        assert_eq!(small.chunk_size, 8 * 1024);
224        assert_eq!(small.parallel_threshold, usize::MAX);
225        assert_eq!(small.overlap_size, 64);
226        assert!(small.validate().is_ok());
227
228        // Large text preset
229        let large = Config::large_text();
230        assert_eq!(large.chunk_size, 512 * 1024);
231        assert_eq!(large.parallel_threshold, 512 * 1024);
232        assert_eq!(large.overlap_size, 512);
233        assert!(large.validate().is_ok());
234
235        // Streaming preset
236        let streaming = Config::streaming();
237        assert_eq!(streaming.chunk_size, 32 * 1024);
238        assert_eq!(streaming.parallel_threshold, 256 * 1024);
239        assert_eq!(streaming.threads, Some(2));
240        assert_eq!(streaming.overlap_size, 128);
241        assert!(streaming.validate().is_ok());
242    }
243
244    #[test]
245    fn test_config_builder_with_new_fields() {
246        let config = Config::builder()
247            .chunk_size(128 * 1024)
248            .parallel_threshold(256 * 1024)
249            .overlap_size(512)
250            .threads(Some(4))
251            .build()
252            .unwrap();
253
254        assert_eq!(config.chunk_size, 128 * 1024);
255        assert_eq!(config.parallel_threshold, 256 * 1024);
256        assert_eq!(config.overlap_size, 512);
257        assert_eq!(config.threads, Some(4));
258    }
259
260    #[test]
261    fn test_config_validation_boundary_values() {
262        // Test chunk_size at minimum valid value
263        let config = Config {
264            chunk_size: 1,
265            overlap_size: 0,
266            ..Default::default()
267        };
268        assert!(config.validate().is_ok());
269
270        // Test overlap_size at maximum valid value (chunk_size - 1)
271        let config = Config {
272            chunk_size: 100,
273            overlap_size: 99,
274            ..Default::default()
275        };
276        assert!(config.validate().is_ok());
277
278        // Test overlap_size equals chunk_size (invalid)
279        let config = Config {
280            chunk_size: 100,
281            overlap_size: 100,
282            ..Default::default()
283        };
284        let result = config.validate();
285        assert!(result.is_err());
286        match result {
287            Err(Error::Configuration(msg)) => {
288                assert!(msg.contains("overlap_size must be less than chunk_size"));
289            }
290            _ => panic!("Expected Configuration error"),
291        }
292
293        // Test threads at minimum valid value
294        let config = Config {
295            threads: Some(1),
296            ..Default::default()
297        };
298        assert!(config.validate().is_ok());
299    }
300
301    #[test]
302    fn test_config_builder_invalid_configurations() {
303        // Test building with invalid chunk_size
304        let result = Config::builder().chunk_size(0).build();
305        assert!(result.is_err());
306
307        // Test building with invalid overlap_size
308        let result = Config::builder().chunk_size(100).overlap_size(100).build();
309        assert!(result.is_err());
310
311        // Test building with invalid thread count
312        let result = Config::builder().threads(Some(0)).build();
313        assert!(result.is_err());
314    }
315
316    #[test]
317    fn test_config_builder_with_invalid_language() {
318        let result = Config::builder()
319            .language("invalid_lang")
320            .unwrap() // language() returns Ok
321            .build(); // Error occurs here during validation
322
323        assert!(result.is_err());
324        match result {
325            Err(Error::InvalidLanguage(msg)) => {
326                assert!(msg.contains("invalid_lang"));
327            }
328            _ => panic!("Expected InvalidLanguage error"),
329        }
330    }
331
332    #[test]
333    fn test_config_validation_error_messages() {
334        // Test chunk_size error message
335        let config = Config {
336            chunk_size: 0,
337            ..Default::default()
338        };
339        match config.validate() {
340            Err(Error::Configuration(msg)) => {
341                assert_eq!(msg, "chunk_size must be greater than 0");
342            }
343            _ => panic!("Expected specific error message"),
344        }
345
346        // Test threads error message
347        let config = Config {
348            threads: Some(0),
349            ..Default::default()
350        };
351        match config.validate() {
352            Err(Error::Configuration(msg)) => {
353                assert_eq!(msg, "threads must be greater than 0");
354            }
355            _ => panic!("Expected specific error message"),
356        }
357    }
358
359    #[test]
360    fn test_config_builder_partial_configuration() {
361        // Test building with only some fields set
362        let config = Config::builder().chunk_size(64 * 1024).build().unwrap();
363
364        assert_eq!(config.chunk_size, 64 * 1024);
365        assert_eq!(config.parallel_threshold, defaults::PARALLEL_THRESHOLD);
366        assert_eq!(config.overlap_size, defaults::OVERLAP_SIZE);
367        assert!(config.threads.is_none());
368    }
369
370    #[test]
371    fn test_config_builder_language_setting() {
372        // Test setting valid language
373        let config = Config::builder().language("en").unwrap().build().unwrap();
374        assert_eq!(config.language, Language::English);
375
376        let config = Config::builder().language("ja").unwrap().build().unwrap();
377        assert_eq!(config.language, Language::Japanese);
378    }
379
380    #[test]
381    fn test_large_configuration_values() {
382        // Test with very large valid values
383        let config = Config {
384            chunk_size: usize::MAX / 2,
385            overlap_size: 1024,
386            parallel_threshold: usize::MAX / 2,
387            threads: Some(1024),
388            ..Default::default()
389        };
390        assert!(config.validate().is_ok());
391    }
392}