1use crate::api::{Error, Language};
4use std::str::FromStr;
5
6pub mod defaults {
8 pub const CHUNK_SIZE: usize = 256 * 1024;
10
11 pub const PARALLEL_THRESHOLD: usize = 1024 * 1024;
13
14 pub const OVERLAP_SIZE: usize = 256;
16}
17
18#[derive(Debug, Clone)]
20pub struct Config {
21 pub(crate) language: Language,
22 pub(crate) chunk_size: usize, pub(crate) parallel_threshold: usize, pub(crate) threads: Option<usize>, pub(crate) overlap_size: usize, }
27
28impl Default for Config {
29 fn default() -> Self {
30 Self {
31 language: Language::default(),
32 chunk_size: defaults::CHUNK_SIZE,
33 parallel_threshold: defaults::PARALLEL_THRESHOLD,
34 threads: None,
35 overlap_size: defaults::OVERLAP_SIZE,
36 }
37 }
38}
39
40impl Config {
41 pub fn builder() -> ConfigBuilder {
43 ConfigBuilder::default()
44 }
45
46 pub fn small_text() -> Self {
48 Self {
49 language: Language::default(),
50 chunk_size: 8 * 1024, parallel_threshold: usize::MAX, threads: None,
53 overlap_size: 64, }
55 }
56
57 pub fn large_text() -> Self {
59 Self {
60 language: Language::default(),
61 chunk_size: 512 * 1024, parallel_threshold: 512 * 1024, threads: None, overlap_size: 512, }
66 }
67
68 pub fn streaming() -> Self {
70 Self {
71 language: Language::default(),
72 chunk_size: 32 * 1024, parallel_threshold: 256 * 1024, threads: Some(2), overlap_size: 128, }
77 }
78
79 pub(crate) fn validate(&self) -> Result<(), Error> {
81 if self.chunk_size == 0 {
82 return Err(Error::Configuration(
83 "chunk_size must be greater than 0".into(),
84 ));
85 }
86
87 if self.overlap_size >= self.chunk_size {
88 return Err(Error::Configuration(
89 "overlap_size must be less than chunk_size".into(),
90 ));
91 }
92
93 if let Some(threads) = self.threads {
94 if threads == 0 {
95 return Err(Error::Configuration(
96 "threads must be greater than 0".into(),
97 ));
98 }
99 }
100
101 Ok(())
102 }
103}
104
105#[derive(Debug, Default)]
107pub struct ConfigBuilder {
108 language: Option<String>,
109 chunk_size: Option<usize>,
110 parallel_threshold: Option<usize>,
111 threads: Option<usize>,
112 overlap_size: Option<usize>,
113}
114
115impl ConfigBuilder {
116 pub fn new() -> Self {
118 Self::default()
119 }
120
121 pub fn language(mut self, code: impl Into<String>) -> Result<Self, Error> {
123 self.language = Some(code.into());
124 Ok(self)
125 }
126
127 pub fn chunk_size(mut self, bytes: usize) -> Self {
129 self.chunk_size = Some(bytes);
130 self
131 }
132
133 pub fn threads(mut self, count: Option<usize>) -> Self {
135 self.threads = count;
136 self
137 }
138
139 pub fn parallel_threshold(mut self, bytes: usize) -> Self {
141 self.parallel_threshold = Some(bytes);
142 self
143 }
144
145 pub fn overlap_size(mut self, bytes: usize) -> Self {
147 self.overlap_size = Some(bytes);
148 self
149 }
150
151 pub fn build(self) -> Result<Config, Error> {
153 let mut config = Config::default();
154
155 if let Some(lang_code) = self.language {
156 config.language = Language::from_str(&lang_code)?;
157 }
158
159 if let Some(size) = self.chunk_size {
160 config.chunk_size = size;
161 }
162
163 if let Some(threshold) = self.parallel_threshold {
164 config.parallel_threshold = threshold;
165 }
166
167 if self.threads.is_some() {
168 config.threads = self.threads;
169 }
170
171 if let Some(overlap) = self.overlap_size {
172 config.overlap_size = overlap;
173 }
174
175 config.validate()?;
176 Ok(config)
177 }
178}
179
180#[cfg(test)]
181mod tests {
182 use super::*;
183
184 #[test]
185 fn test_default_config() {
186 let config = Config::default();
187 assert_eq!(config.chunk_size, defaults::CHUNK_SIZE);
188 assert_eq!(config.parallel_threshold, defaults::PARALLEL_THRESHOLD);
189 assert_eq!(config.overlap_size, defaults::OVERLAP_SIZE);
190 assert!(config.threads.is_none());
191 assert!(config.validate().is_ok());
192 }
193
194 #[test]
195 fn test_config_validation() {
196 let config = Config {
198 chunk_size: 0,
199 ..Default::default()
200 };
201 assert!(config.validate().is_err());
202
203 let config = Config {
205 chunk_size: 100,
206 overlap_size: 200,
207 ..Default::default()
208 };
209 assert!(config.validate().is_err());
210
211 let config = Config {
213 threads: Some(0),
214 ..Default::default()
215 };
216 assert!(config.validate().is_err());
217 }
218
219 #[test]
220 fn test_preset_configs() {
221 let small = Config::small_text();
223 assert_eq!(small.chunk_size, 8 * 1024);
224 assert_eq!(small.parallel_threshold, usize::MAX);
225 assert_eq!(small.overlap_size, 64);
226 assert!(small.validate().is_ok());
227
228 let large = Config::large_text();
230 assert_eq!(large.chunk_size, 512 * 1024);
231 assert_eq!(large.parallel_threshold, 512 * 1024);
232 assert_eq!(large.overlap_size, 512);
233 assert!(large.validate().is_ok());
234
235 let streaming = Config::streaming();
237 assert_eq!(streaming.chunk_size, 32 * 1024);
238 assert_eq!(streaming.parallel_threshold, 256 * 1024);
239 assert_eq!(streaming.threads, Some(2));
240 assert_eq!(streaming.overlap_size, 128);
241 assert!(streaming.validate().is_ok());
242 }
243
244 #[test]
245 fn test_config_builder_with_new_fields() {
246 let config = Config::builder()
247 .chunk_size(128 * 1024)
248 .parallel_threshold(256 * 1024)
249 .overlap_size(512)
250 .threads(Some(4))
251 .build()
252 .unwrap();
253
254 assert_eq!(config.chunk_size, 128 * 1024);
255 assert_eq!(config.parallel_threshold, 256 * 1024);
256 assert_eq!(config.overlap_size, 512);
257 assert_eq!(config.threads, Some(4));
258 }
259
260 #[test]
261 fn test_config_validation_boundary_values() {
262 let config = Config {
264 chunk_size: 1,
265 overlap_size: 0,
266 ..Default::default()
267 };
268 assert!(config.validate().is_ok());
269
270 let config = Config {
272 chunk_size: 100,
273 overlap_size: 99,
274 ..Default::default()
275 };
276 assert!(config.validate().is_ok());
277
278 let config = Config {
280 chunk_size: 100,
281 overlap_size: 100,
282 ..Default::default()
283 };
284 let result = config.validate();
285 assert!(result.is_err());
286 match result {
287 Err(Error::Configuration(msg)) => {
288 assert!(msg.contains("overlap_size must be less than chunk_size"));
289 }
290 _ => panic!("Expected Configuration error"),
291 }
292
293 let config = Config {
295 threads: Some(1),
296 ..Default::default()
297 };
298 assert!(config.validate().is_ok());
299 }
300
301 #[test]
302 fn test_config_builder_invalid_configurations() {
303 let result = Config::builder().chunk_size(0).build();
305 assert!(result.is_err());
306
307 let result = Config::builder().chunk_size(100).overlap_size(100).build();
309 assert!(result.is_err());
310
311 let result = Config::builder().threads(Some(0)).build();
313 assert!(result.is_err());
314 }
315
316 #[test]
317 fn test_config_builder_with_invalid_language() {
318 let result = Config::builder()
319 .language("invalid_lang")
320 .unwrap() .build(); assert!(result.is_err());
324 match result {
325 Err(Error::InvalidLanguage(msg)) => {
326 assert!(msg.contains("invalid_lang"));
327 }
328 _ => panic!("Expected InvalidLanguage error"),
329 }
330 }
331
332 #[test]
333 fn test_config_validation_error_messages() {
334 let config = Config {
336 chunk_size: 0,
337 ..Default::default()
338 };
339 match config.validate() {
340 Err(Error::Configuration(msg)) => {
341 assert_eq!(msg, "chunk_size must be greater than 0");
342 }
343 _ => panic!("Expected specific error message"),
344 }
345
346 let config = Config {
348 threads: Some(0),
349 ..Default::default()
350 };
351 match config.validate() {
352 Err(Error::Configuration(msg)) => {
353 assert_eq!(msg, "threads must be greater than 0");
354 }
355 _ => panic!("Expected specific error message"),
356 }
357 }
358
359 #[test]
360 fn test_config_builder_partial_configuration() {
361 let config = Config::builder().chunk_size(64 * 1024).build().unwrap();
363
364 assert_eq!(config.chunk_size, 64 * 1024);
365 assert_eq!(config.parallel_threshold, defaults::PARALLEL_THRESHOLD);
366 assert_eq!(config.overlap_size, defaults::OVERLAP_SIZE);
367 assert!(config.threads.is_none());
368 }
369
370 #[test]
371 fn test_config_builder_language_setting() {
372 let config = Config::builder().language("en").unwrap().build().unwrap();
374 assert_eq!(config.language, Language::English);
375
376 let config = Config::builder().language("ja").unwrap().build().unwrap();
377 assert_eq!(config.language, Language::Japanese);
378 }
379
380 #[test]
381 fn test_large_configuration_values() {
382 let config = Config {
384 chunk_size: usize::MAX / 2,
385 overlap_size: 1024,
386 parallel_threshold: usize::MAX / 2,
387 threads: Some(1024),
388 ..Default::default()
389 };
390 assert!(config.validate().is_ok());
391 }
392}