sakurs_core/application/
config.rs

1//! Configuration and error handling for the application layer
2//!
3//! This module provides configuration options for performance tuning
4//! and comprehensive error types for robust error handling.
5
6use thiserror::Error;
7
8/// Configuration options for text processing
9#[derive(Debug, Clone)]
10pub struct ProcessorConfig {
11    /// Target size for each chunk in bytes
12    pub chunk_size: usize,
13
14    /// Minimum text size to trigger parallel processing
15    pub parallel_threshold: usize,
16
17    /// Maximum number of threads to use (None = use all available)
18    pub max_threads: Option<usize>,
19
20    /// Size of overlap between chunks for cross-boundary detection
21    pub overlap_size: usize,
22}
23
24impl Default for ProcessorConfig {
25    fn default() -> Self {
26        Self {
27            chunk_size: 256 * 1024,          // 256KB chunks
28            parallel_threshold: 1024 * 1024, // 1MB threshold for parallel
29            max_threads: None,               // Use all available cores
30            overlap_size: 256,               // 256 char overlap
31        }
32    }
33}
34
35impl ProcessorConfig {
36    /// Creates a new builder for ProcessorConfig
37    pub fn builder() -> ProcessorConfigBuilder {
38        ProcessorConfigBuilder::new()
39    }
40
41    /// Creates a configuration optimized for small texts
42    pub fn small_text() -> Self {
43        Self {
44            chunk_size: 8 * 1024,           // 8KB chunks
45            parallel_threshold: usize::MAX, // Never use parallel
46            overlap_size: 64,               // Smaller overlap
47            ..Default::default()
48        }
49    }
50
51    /// Creates a configuration optimized for large texts
52    pub fn large_text() -> Self {
53        Self {
54            chunk_size: 512 * 1024,         // 512KB chunks
55            parallel_threshold: 512 * 1024, // 512KB threshold
56            max_threads: None,              // Use all available cores
57            overlap_size: 512,              // Larger overlap
58        }
59    }
60
61    /// Creates a configuration optimized for streaming
62    pub fn streaming() -> Self {
63        Self {
64            chunk_size: 32 * 1024,          // 32KB chunks
65            parallel_threshold: 256 * 1024, // 256KB threshold
66            max_threads: Some(2),           // Limited parallelism
67            overlap_size: 128,              // Moderate overlap
68        }
69    }
70
71    /// Validates the configuration
72    pub fn validate(&self) -> Result<(), ProcessingError> {
73        if self.chunk_size == 0 {
74            return Err(ProcessingError::InvalidConfig {
75                reason: "Chunk size must be greater than 0".to_string(),
76            });
77        }
78
79        if self.overlap_size >= self.chunk_size {
80            return Err(ProcessingError::InvalidConfig {
81                reason: "Overlap size must be less than chunk size".to_string(),
82            });
83        }
84
85        if let Some(threads) = self.max_threads {
86            if threads == 0 {
87                return Err(ProcessingError::InvalidConfig {
88                    reason: "Max threads must be greater than 0".to_string(),
89                });
90            }
91        }
92
93        Ok(())
94    }
95}
96
97/// Errors that can occur during text processing
98#[derive(Debug, Error)]
99pub enum ProcessingError {
100    /// Text exceeds maximum size limit
101    #[error("Text too large for processing: {size} bytes (max: {max} bytes)")]
102    TextTooLarge { size: usize, max: usize },
103
104    /// Invalid configuration parameters
105    #[error("Invalid configuration: {reason}")]
106    InvalidConfig { reason: String },
107
108    /// Error during parallel processing
109    #[error("Parallel processing failed")]
110    ParallelError {
111        #[source]
112        source: Box<dyn std::error::Error + Send + Sync>,
113    },
114
115    /// UTF-8 encoding error
116    #[error("Invalid UTF-8 in text at position {position}")]
117    Utf8Error { position: usize },
118
119    /// Chunk boundary calculation error
120    #[error("Failed to calculate chunk boundaries: {reason}")]
121    ChunkingError { reason: String },
122
123    /// UTF-8 boundary detection failed
124    #[error("Failed to find UTF-8 boundary at position {position}")]
125    Utf8BoundaryError { position: usize },
126
127    /// Word boundary detection failed
128    #[error("Failed to find word boundary near position {position}")]
129    WordBoundaryError { position: usize },
130
131    /// Invalid chunk configuration
132    #[error("Invalid chunk boundaries: start={start}, end={end}, next={next}")]
133    InvalidChunkBoundaries {
134        start: usize,
135        end: usize,
136        next: usize,
137    },
138
139    /// Memory allocation failure
140    #[error("Memory allocation failed: {reason}")]
141    AllocationError { reason: String },
142
143    /// I/O error (for future file operations)
144    #[error("I/O operation failed")]
145    IoError {
146        #[from]
147        source: std::io::Error,
148    },
149
150    /// Language rules error
151    #[error("Language rules processing failed: {reason}")]
152    LanguageRulesError { reason: String },
153
154    /// Other errors
155    #[error("Other error: {0}")]
156    Other(String),
157}
158
159/// Result type for processing operations
160pub type ProcessingResult<T> = Result<T, ProcessingError>;
161
162/// Builder for ProcessorConfig with fluent API
163#[derive(Debug, Clone)]
164pub struct ProcessorConfigBuilder {
165    config: ProcessorConfig,
166}
167
168impl ProcessorConfigBuilder {
169    /// Creates a new builder with default values
170    pub fn new() -> Self {
171        Self {
172            config: ProcessorConfig::default(),
173        }
174    }
175
176    /// Sets the chunk size in bytes
177    pub fn chunk_size(mut self, size: usize) -> Self {
178        self.config.chunk_size = size;
179        self
180    }
181
182    /// Sets the parallel processing threshold
183    pub fn parallel_threshold(mut self, threshold: usize) -> Self {
184        self.config.parallel_threshold = threshold;
185        self
186    }
187
188    /// Sets the maximum number of threads
189    pub fn max_threads(mut self, threads: Option<usize>) -> Self {
190        self.config.max_threads = threads;
191        self
192    }
193
194    /// Sets the overlap size between chunks
195    pub fn overlap_size(mut self, size: usize) -> Self {
196        self.config.overlap_size = size;
197        self
198    }
199
200    /// Builds the configuration, validating parameters
201    pub fn build(self) -> ProcessingResult<ProcessorConfig> {
202        self.config.validate()?;
203        Ok(self.config)
204    }
205
206    /// Builds the configuration without validation (for testing)
207    pub fn build_unchecked(self) -> ProcessorConfig {
208        self.config
209    }
210}
211
212impl Default for ProcessorConfigBuilder {
213    fn default() -> Self {
214        Self::new()
215    }
216}
217
218/// Performance metrics collected during processing
219#[derive(Debug, Clone, Default)]
220pub struct ProcessingMetrics {
221    /// Total processing time in microseconds
222    pub total_time_us: u64,
223
224    /// Time spent in chunking
225    pub chunking_time_us: u64,
226
227    /// Time spent in parallel processing
228    pub parallel_time_us: u64,
229
230    /// Time spent in result merging
231    pub merge_time_us: u64,
232
233    /// Number of chunks processed
234    pub chunk_count: usize,
235
236    /// Number of threads used
237    pub thread_count: usize,
238
239    /// Total bytes processed
240    pub bytes_processed: usize,
241
242    /// Number of boundaries detected
243    pub boundaries_found: usize,
244}
245
246impl ProcessingMetrics {
247    /// Calculates throughput in MB/s
248    pub fn throughput_mbps(&self) -> f64 {
249        if self.total_time_us == 0 {
250            return 0.0;
251        }
252
253        let mb = self.bytes_processed as f64 / (1024.0 * 1024.0);
254        let seconds = self.total_time_us as f64 / 1_000_000.0;
255        mb / seconds
256    }
257
258    /// Calculates parallel efficiency (0.0 to 1.0)
259    pub fn parallel_efficiency(&self) -> f64 {
260        if self.thread_count <= 1 || self.parallel_time_us == 0 {
261            return 1.0;
262        }
263
264        // Ideal parallel time would be total_time / thread_count
265        let ideal_time = self.total_time_us as f64 / self.thread_count as f64;
266        let actual_time = self.parallel_time_us as f64;
267
268        (ideal_time / actual_time).min(1.0)
269    }
270}
271
272/// Thread pool configuration
273#[cfg(feature = "parallel")]
274#[derive(Debug, Clone)]
275pub struct ThreadPoolConfig {
276    /// Number of worker threads
277    pub num_threads: usize,
278
279    /// Stack size for worker threads (in bytes)
280    pub stack_size: Option<usize>,
281
282    /// Thread name prefix
283    pub thread_name_prefix: String,
284}
285
286#[cfg(feature = "parallel")]
287impl Default for ThreadPoolConfig {
288    fn default() -> Self {
289        Self {
290            num_threads: num_cpus::get(),
291            stack_size: None,
292            thread_name_prefix: "sakurs-worker".to_string(),
293        }
294    }
295}
296
297#[cfg(test)]
298mod tests {
299    use super::*;
300
301    #[test]
302    fn test_default_config() {
303        let config = ProcessorConfig::default();
304        assert_eq!(config.chunk_size, 256 * 1024);
305        assert_eq!(config.parallel_threshold, 1024 * 1024);
306        assert!(config.validate().is_ok());
307    }
308
309    #[test]
310    fn test_config_validation() {
311        // Invalid chunk size
312        let config = ProcessorConfig {
313            chunk_size: 0,
314            ..Default::default()
315        };
316        assert!(config.validate().is_err());
317
318        // Invalid overlap size
319        let config = ProcessorConfig {
320            chunk_size: 1024,
321            overlap_size: 2048,
322            ..Default::default()
323        };
324        assert!(config.validate().is_err());
325
326        // Invalid thread count
327        let config = ProcessorConfig {
328            max_threads: Some(0),
329            ..Default::default()
330        };
331        assert!(config.validate().is_err());
332    }
333
334    #[test]
335    fn test_preset_configs() {
336        let small = ProcessorConfig::small_text();
337        assert_eq!(small.chunk_size, 8 * 1024);
338        assert_eq!(small.parallel_threshold, usize::MAX);
339
340        let large = ProcessorConfig::large_text();
341        assert_eq!(large.chunk_size, 512 * 1024);
342
343        let streaming = ProcessorConfig::streaming();
344        assert_eq!(streaming.max_threads, Some(2));
345    }
346
347    #[test]
348    fn test_processing_metrics() {
349        let metrics = ProcessingMetrics {
350            bytes_processed: 10 * 1024 * 1024, // 10MB
351            total_time_us: 1_000_000,          // 1 second
352            ..Default::default()
353        };
354
355        assert_eq!(metrics.throughput_mbps(), 10.0);
356
357        let metrics_parallel = ProcessingMetrics {
358            bytes_processed: 10 * 1024 * 1024,
359            total_time_us: 1_000_000,
360            thread_count: 4,
361            parallel_time_us: 300_000, // 0.3 seconds
362            ..Default::default()
363        };
364        assert!(metrics_parallel.parallel_efficiency() > 0.8);
365    }
366}