ruvector_scipix/preprocess/
pipeline.rs

1//! Complete preprocessing pipeline with builder pattern and parallel processing
2
3use super::Result;
4use crate::preprocess::{transforms, rotation, deskew, enhancement};
5use image::{DynamicImage, GrayImage};
6use rayon::prelude::*;
7use std::sync::Arc;
8
9/// Progress callback type
10pub type ProgressCallback = Arc<dyn Fn(&str, f32) + Send + Sync>;
11
12/// Complete preprocessing pipeline with configurable steps
13pub struct PreprocessPipeline {
14    auto_rotate: bool,
15    auto_deskew: bool,
16    enhance_contrast: bool,
17    denoise: bool,
18    blur_sigma: f32,
19    clahe_clip_limit: f32,
20    clahe_tile_size: u32,
21    threshold: Option<u8>,
22    adaptive_threshold: bool,
23    adaptive_window_size: u32,
24    target_width: Option<u32>,
25    target_height: Option<u32>,
26    progress_callback: Option<ProgressCallback>,
27}
28
29/// Builder for preprocessing pipeline
30pub struct PreprocessPipelineBuilder {
31    auto_rotate: bool,
32    auto_deskew: bool,
33    enhance_contrast: bool,
34    denoise: bool,
35    blur_sigma: f32,
36    clahe_clip_limit: f32,
37    clahe_tile_size: u32,
38    threshold: Option<u8>,
39    adaptive_threshold: bool,
40    adaptive_window_size: u32,
41    target_width: Option<u32>,
42    target_height: Option<u32>,
43    progress_callback: Option<ProgressCallback>,
44}
45
46impl Default for PreprocessPipelineBuilder {
47    fn default() -> Self {
48        Self {
49            auto_rotate: true,
50            auto_deskew: true,
51            enhance_contrast: true,
52            denoise: true,
53            blur_sigma: 1.0,
54            clahe_clip_limit: 2.0,
55            clahe_tile_size: 8,
56            threshold: None,
57            adaptive_threshold: true,
58            adaptive_window_size: 15,
59            target_width: None,
60            target_height: None,
61            progress_callback: None,
62        }
63    }
64}
65
66impl PreprocessPipelineBuilder {
67    pub fn new() -> Self {
68        Self::default()
69    }
70
71    pub fn auto_rotate(mut self, enable: bool) -> Self {
72        self.auto_rotate = enable;
73        self
74    }
75
76    pub fn auto_deskew(mut self, enable: bool) -> Self {
77        self.auto_deskew = enable;
78        self
79    }
80
81    pub fn enhance_contrast(mut self, enable: bool) -> Self {
82        self.enhance_contrast = enable;
83        self
84    }
85
86    pub fn denoise(mut self, enable: bool) -> Self {
87        self.denoise = enable;
88        self
89    }
90
91    pub fn blur_sigma(mut self, sigma: f32) -> Self {
92        self.blur_sigma = sigma;
93        self
94    }
95
96    pub fn clahe_clip_limit(mut self, limit: f32) -> Self {
97        self.clahe_clip_limit = limit;
98        self
99    }
100
101    pub fn clahe_tile_size(mut self, size: u32) -> Self {
102        self.clahe_tile_size = size;
103        self
104    }
105
106    pub fn threshold(mut self, threshold: Option<u8>) -> Self {
107        self.threshold = threshold;
108        self
109    }
110
111    pub fn adaptive_threshold(mut self, enable: bool) -> Self {
112        self.adaptive_threshold = enable;
113        self
114    }
115
116    pub fn adaptive_window_size(mut self, size: u32) -> Self {
117        self.adaptive_window_size = size;
118        self
119    }
120
121    pub fn target_size(mut self, width: Option<u32>, height: Option<u32>) -> Self {
122        self.target_width = width;
123        self.target_height = height;
124        self
125    }
126
127    pub fn progress_callback<F>(mut self, callback: F) -> Self
128    where
129        F: Fn(&str, f32) + Send + Sync + 'static,
130    {
131        self.progress_callback = Some(Arc::new(callback));
132        self
133    }
134
135    pub fn build(self) -> PreprocessPipeline {
136        PreprocessPipeline {
137            auto_rotate: self.auto_rotate,
138            auto_deskew: self.auto_deskew,
139            enhance_contrast: self.enhance_contrast,
140            denoise: self.denoise,
141            blur_sigma: self.blur_sigma,
142            clahe_clip_limit: self.clahe_clip_limit,
143            clahe_tile_size: self.clahe_tile_size,
144            threshold: self.threshold,
145            adaptive_threshold: self.adaptive_threshold,
146            adaptive_window_size: self.adaptive_window_size,
147            target_width: self.target_width,
148            target_height: self.target_height,
149            progress_callback: self.progress_callback,
150        }
151    }
152}
153
154impl PreprocessPipeline {
155    /// Create a new pipeline builder
156    pub fn builder() -> PreprocessPipelineBuilder {
157        PreprocessPipelineBuilder::new()
158    }
159
160    /// Report progress if callback is set
161    fn report_progress(&self, step: &str, progress: f32) {
162        if let Some(callback) = &self.progress_callback {
163            callback(step, progress);
164        }
165    }
166
167    /// Process a single image through the complete pipeline
168    ///
169    /// # Pipeline steps:
170    /// 1. Convert to grayscale
171    /// 2. Detect and correct rotation (if enabled)
172    /// 3. Detect and correct skew (if enabled)
173    /// 4. Enhance contrast with CLAHE (if enabled)
174    /// 5. Denoise with Gaussian blur (if enabled)
175    /// 6. Apply thresholding (binary or adaptive)
176    /// 7. Resize to target dimensions (if specified)
177    pub fn process(&self, image: &DynamicImage) -> Result<GrayImage> {
178        self.report_progress("Starting preprocessing", 0.0);
179
180        // Step 1: Convert to grayscale
181        self.report_progress("Converting to grayscale", 0.1);
182        let mut gray = transforms::to_grayscale(image);
183
184        // Step 2: Auto-rotate
185        if self.auto_rotate {
186            self.report_progress("Detecting rotation", 0.2);
187            let angle = rotation::detect_rotation(&gray)?;
188
189            if angle.abs() > 0.5 {
190                self.report_progress("Correcting rotation", 0.25);
191                gray = rotation::rotate_image(&gray, -angle)?;
192            }
193        }
194
195        // Step 3: Auto-deskew
196        if self.auto_deskew {
197            self.report_progress("Detecting skew", 0.3);
198            let angle = deskew::detect_skew_angle(&gray)?;
199
200            if angle.abs() > 0.5 {
201                self.report_progress("Correcting skew", 0.35);
202                gray = deskew::deskew_image(&gray, angle)?;
203            }
204        }
205
206        // Step 4: Enhance contrast
207        if self.enhance_contrast {
208            self.report_progress("Enhancing contrast", 0.5);
209            gray = enhancement::clahe(
210                &gray,
211                self.clahe_clip_limit,
212                self.clahe_tile_size,
213            )?;
214        }
215
216        // Step 5: Denoise
217        if self.denoise {
218            self.report_progress("Denoising", 0.6);
219            gray = transforms::gaussian_blur(&gray, self.blur_sigma)?;
220        }
221
222        // Step 6: Thresholding
223        self.report_progress("Applying threshold", 0.7);
224        gray = if self.adaptive_threshold {
225            transforms::adaptive_threshold(&gray, self.adaptive_window_size)?
226        } else if let Some(threshold_val) = self.threshold {
227            transforms::threshold(&gray, threshold_val)
228        } else {
229            // Auto Otsu threshold
230            let threshold_val = transforms::otsu_threshold(&gray)?;
231            transforms::threshold(&gray, threshold_val)
232        };
233
234        // Step 7: Resize
235        if let (Some(width), Some(height)) = (self.target_width, self.target_height) {
236            self.report_progress("Resizing", 0.9);
237            gray = image::imageops::resize(
238                &gray,
239                width,
240                height,
241                image::imageops::FilterType::Lanczos3,
242            );
243        }
244
245        self.report_progress("Preprocessing complete", 1.0);
246        Ok(gray)
247    }
248
249    /// Process multiple images in parallel
250    ///
251    /// # Arguments
252    /// * `images` - Vector of images to process
253    ///
254    /// # Returns
255    /// Vector of preprocessed images in the same order
256    pub fn process_batch(&self, images: Vec<DynamicImage>) -> Result<Vec<GrayImage>> {
257        images
258            .into_par_iter()
259            .map(|img| self.process(&img))
260            .collect()
261    }
262
263    /// Process image and return intermediate results from each step
264    ///
265    /// Useful for debugging and visualization
266    pub fn process_with_intermediates(
267        &self,
268        image: &DynamicImage,
269    ) -> Result<Vec<(String, GrayImage)>> {
270        let mut results = Vec::new();
271
272        // Step 1: Grayscale
273        let mut gray = transforms::to_grayscale(image);
274        results.push(("01_grayscale".to_string(), gray.clone()));
275
276        // Step 2: Rotation
277        if self.auto_rotate {
278            let angle = rotation::detect_rotation(&gray)?;
279            if angle.abs() > 0.5 {
280                gray = rotation::rotate_image(&gray, -angle)?;
281                results.push(("02_rotated".to_string(), gray.clone()));
282            }
283        }
284
285        // Step 3: Deskew
286        if self.auto_deskew {
287            let angle = deskew::detect_skew_angle(&gray)?;
288            if angle.abs() > 0.5 {
289                gray = deskew::deskew_image(&gray, angle)?;
290                results.push(("03_deskewed".to_string(), gray.clone()));
291            }
292        }
293
294        // Step 4: Enhancement
295        if self.enhance_contrast {
296            gray = enhancement::clahe(&gray, self.clahe_clip_limit, self.clahe_tile_size)?;
297            results.push(("04_enhanced".to_string(), gray.clone()));
298        }
299
300        // Step 5: Denoise
301        if self.denoise {
302            gray = transforms::gaussian_blur(&gray, self.blur_sigma)?;
303            results.push(("05_denoised".to_string(), gray.clone()));
304        }
305
306        // Step 6: Threshold
307        gray = if self.adaptive_threshold {
308            transforms::adaptive_threshold(&gray, self.adaptive_window_size)?
309        } else if let Some(threshold_val) = self.threshold {
310            transforms::threshold(&gray, threshold_val)
311        } else {
312            let threshold_val = transforms::otsu_threshold(&gray)?;
313            transforms::threshold(&gray, threshold_val)
314        };
315        results.push(("06_thresholded".to_string(), gray.clone()));
316
317        // Step 7: Resize
318        if let (Some(width), Some(height)) = (self.target_width, self.target_height) {
319            gray = image::imageops::resize(&gray, width, height, image::imageops::FilterType::Lanczos3);
320            results.push(("07_resized".to_string(), gray.clone()));
321        }
322
323        Ok(results)
324    }
325}
326
327#[cfg(test)]
328mod tests {
329    use super::*;
330    use image::{Rgb, RgbImage};
331
332    fn create_test_image() -> DynamicImage {
333        let mut img = RgbImage::new(100, 100);
334        for y in 0..100 {
335            for x in 0..100 {
336                let val = ((x + y) / 2) as u8;
337                img.put_pixel(x, y, Rgb([val, val, val]));
338            }
339        }
340        DynamicImage::ImageRgb8(img)
341    }
342
343    #[test]
344    fn test_pipeline_builder() {
345        let pipeline = PreprocessPipeline::builder()
346            .auto_rotate(false)
347            .denoise(true)
348            .blur_sigma(1.5)
349            .build();
350
351        assert!(!pipeline.auto_rotate);
352        assert!(pipeline.denoise);
353        assert!((pipeline.blur_sigma - 1.5).abs() < 0.001);
354    }
355
356    #[test]
357    fn test_pipeline_process() {
358        let img = create_test_image();
359        let pipeline = PreprocessPipeline::builder()
360            .auto_rotate(false)
361            .auto_deskew(false)
362            .build();
363
364        let result = pipeline.process(&img);
365        assert!(result.is_ok());
366
367        let processed = result.unwrap();
368        assert_eq!(processed.width(), 100);
369        assert_eq!(processed.height(), 100);
370    }
371
372    #[test]
373    fn test_pipeline_with_resize() {
374        let img = create_test_image();
375        let pipeline = PreprocessPipeline::builder()
376            .target_size(Some(50), Some(50))
377            .auto_rotate(false)
378            .auto_deskew(false)
379            .build();
380
381        let result = pipeline.process(&img);
382        assert!(result.is_ok());
383
384        let processed = result.unwrap();
385        assert_eq!(processed.width(), 50);
386        assert_eq!(processed.height(), 50);
387    }
388
389    #[test]
390    fn test_pipeline_batch_processing() {
391        let images = vec![
392            create_test_image(),
393            create_test_image(),
394            create_test_image(),
395        ];
396
397        let pipeline = PreprocessPipeline::builder()
398            .auto_rotate(false)
399            .auto_deskew(false)
400            .build();
401
402        let results = pipeline.process_batch(images);
403        assert!(results.is_ok());
404
405        let processed = results.unwrap();
406        assert_eq!(processed.len(), 3);
407    }
408
409    #[test]
410    fn test_pipeline_intermediates() {
411        let img = create_test_image();
412        let pipeline = PreprocessPipeline::builder()
413            .auto_rotate(false)
414            .auto_deskew(false)
415            .enhance_contrast(true)
416            .denoise(true)
417            .build();
418
419        let result = pipeline.process_with_intermediates(&img);
420        assert!(result.is_ok());
421
422        let intermediates = result.unwrap();
423        assert!(!intermediates.is_empty());
424        assert!(intermediates.iter().any(|(name, _)| name.contains("grayscale")));
425        assert!(intermediates.iter().any(|(name, _)| name.contains("thresholded")));
426    }
427
428    #[test]
429    fn test_progress_callback() {
430        use std::sync::{Arc, Mutex};
431
432        let progress_steps = Arc::new(Mutex::new(Vec::new()));
433        let progress_clone = Arc::clone(&progress_steps);
434
435        let pipeline = PreprocessPipeline::builder()
436            .auto_rotate(false)
437            .auto_deskew(false)
438            .progress_callback(move |step, _progress| {
439                progress_clone.lock().unwrap().push(step.to_string());
440            })
441            .build();
442
443        let img = create_test_image();
444        let _ = pipeline.process(&img);
445
446        let steps = progress_steps.lock().unwrap();
447        assert!(!steps.is_empty());
448        assert!(steps.iter().any(|s| s.contains("Starting")));
449        assert!(steps.iter().any(|s| s.contains("complete")));
450    }
451}