ruvector_scipix/preprocess/
mod.rs

1//! Image preprocessing module for OCR pipeline
2//!
3//! This module provides comprehensive image preprocessing capabilities including:
4//! - Image transformations (grayscale, blur, sharpen, threshold)
5//! - Rotation detection and correction
6//! - Skew correction (deskewing)
7//! - Image enhancement (CLAHE, normalization)
8//! - Text region segmentation
9//! - Complete preprocessing pipeline with parallel processing
10
11pub mod pipeline;
12pub mod transforms;
13pub mod rotation;
14pub mod deskew;
15pub mod enhancement;
16pub mod segmentation;
17
18use image::{DynamicImage, GrayImage};
19use serde::{Deserialize, Serialize};
20use thiserror::Error;
21
22/// Preprocessing error types
23#[derive(Error, Debug)]
24pub enum PreprocessError {
25    #[error("Image loading error: {0}")]
26    ImageLoad(String),
27
28    #[error("Invalid parameters: {0}")]
29    InvalidParameters(String),
30
31    #[error("Processing error: {0}")]
32    Processing(String),
33
34    #[error("Segmentation error: {0}")]
35    Segmentation(String),
36}
37
38/// Result type for preprocessing operations
39pub type Result<T> = std::result::Result<T, PreprocessError>;
40
41/// Preprocessing options for configuring the pipeline
42#[derive(Debug, Clone, Serialize, Deserialize)]
43pub struct PreprocessOptions {
44    /// Enable rotation detection and correction
45    pub auto_rotate: bool,
46
47    /// Enable skew detection and correction
48    pub auto_deskew: bool,
49
50    /// Enable contrast enhancement
51    pub enhance_contrast: bool,
52
53    /// Enable denoising
54    pub denoise: bool,
55
56    /// Binarization threshold (None for auto Otsu)
57    pub threshold: Option<u8>,
58
59    /// Enable adaptive thresholding
60    pub adaptive_threshold: bool,
61
62    /// Adaptive threshold window size
63    pub adaptive_window_size: u32,
64
65    /// Target image width (None to keep original)
66    pub target_width: Option<u32>,
67
68    /// Target image height (None to keep original)
69    pub target_height: Option<u32>,
70
71    /// Enable text region detection
72    pub detect_regions: bool,
73
74    /// Gaussian blur sigma for denoising
75    pub blur_sigma: f32,
76
77    /// CLAHE clip limit for contrast enhancement
78    pub clahe_clip_limit: f32,
79
80    /// CLAHE tile size
81    pub clahe_tile_size: u32,
82}
83
84impl Default for PreprocessOptions {
85    fn default() -> Self {
86        Self {
87            auto_rotate: true,
88            auto_deskew: true,
89            enhance_contrast: true,
90            denoise: true,
91            threshold: None,
92            adaptive_threshold: true,
93            adaptive_window_size: 15,
94            target_width: None,
95            target_height: None,
96            detect_regions: true,
97            blur_sigma: 1.0,
98            clahe_clip_limit: 2.0,
99            clahe_tile_size: 8,
100        }
101    }
102}
103
104/// Type of text region
105#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
106pub enum RegionType {
107    /// Regular text
108    Text,
109    /// Mathematical equation
110    Math,
111    /// Table
112    Table,
113    /// Figure/Image
114    Figure,
115    /// Unknown/Other
116    Unknown,
117}
118
119/// Detected text region with bounding box
120#[derive(Debug, Clone, Serialize, Deserialize)]
121pub struct TextRegion {
122    /// Region type
123    pub region_type: RegionType,
124
125    /// Bounding box (x, y, width, height)
126    pub bbox: (u32, u32, u32, u32),
127
128    /// Confidence score (0.0 to 1.0)
129    pub confidence: f32,
130
131    /// Average text height in pixels
132    pub text_height: f32,
133
134    /// Detected baseline angle in degrees
135    pub baseline_angle: f32,
136}
137
138/// Main preprocessing function with configurable options
139///
140/// # Arguments
141/// * `image` - Input image to preprocess
142/// * `options` - Preprocessing configuration options
143///
144/// # Returns
145/// Preprocessed grayscale image ready for OCR
146///
147/// # Example
148/// ```no_run
149/// use image::open;
150/// use ruvector_scipix::preprocess::{preprocess, PreprocessOptions};
151///
152/// let img = open("document.jpg").unwrap();
153/// let options = PreprocessOptions::default();
154/// let processed = preprocess(&img, &options).unwrap();
155/// ```
156pub fn preprocess(image: &DynamicImage, options: &PreprocessOptions) -> Result<GrayImage> {
157    pipeline::PreprocessPipeline::builder()
158        .auto_rotate(options.auto_rotate)
159        .auto_deskew(options.auto_deskew)
160        .enhance_contrast(options.enhance_contrast)
161        .denoise(options.denoise)
162        .blur_sigma(options.blur_sigma)
163        .clahe_clip_limit(options.clahe_clip_limit)
164        .clahe_tile_size(options.clahe_tile_size)
165        .threshold(options.threshold)
166        .adaptive_threshold(options.adaptive_threshold)
167        .adaptive_window_size(options.adaptive_window_size)
168        .target_size(options.target_width, options.target_height)
169        .build()
170        .process(image)
171}
172
173/// Detect text regions in an image
174///
175/// # Arguments
176/// * `image` - Input grayscale image
177/// * `min_region_size` - Minimum region size in pixels
178///
179/// # Returns
180/// Vector of detected text regions with metadata
181///
182/// # Example
183/// ```no_run
184/// use image::open;
185/// use ruvector_scipix::preprocess::detect_text_regions;
186///
187/// let img = open("document.jpg").unwrap().to_luma8();
188/// let regions = detect_text_regions(&img, 100).unwrap();
189/// println!("Found {} text regions", regions.len());
190/// ```
191pub fn detect_text_regions(
192    image: &GrayImage,
193    min_region_size: u32,
194) -> Result<Vec<TextRegion>> {
195    segmentation::find_text_regions(image, min_region_size)
196}
197
198#[cfg(test)]
199mod tests {
200    use super::*;
201    use image::{Rgb, RgbImage};
202
203    fn create_test_image(width: u32, height: u32) -> DynamicImage {
204        let mut img = RgbImage::new(width, height);
205
206        // Create a simple test pattern
207        for y in 0..height {
208            for x in 0..width {
209                let val = ((x + y) % 256) as u8;
210                img.put_pixel(x, y, Rgb([val, val, val]));
211            }
212        }
213
214        DynamicImage::ImageRgb8(img)
215    }
216
217    #[test]
218    fn test_preprocess_default_options() {
219        let img = create_test_image(100, 100);
220        let options = PreprocessOptions::default();
221
222        let result = preprocess(&img, &options);
223        assert!(result.is_ok());
224
225        let processed = result.unwrap();
226        assert_eq!(processed.width(), 100);
227        assert_eq!(processed.height(), 100);
228    }
229
230    #[test]
231    fn test_preprocess_with_resize() {
232        let img = create_test_image(200, 200);
233        let mut options = PreprocessOptions::default();
234        options.target_width = Some(100);
235        options.target_height = Some(100);
236
237        let result = preprocess(&img, &options);
238        assert!(result.is_ok());
239
240        let processed = result.unwrap();
241        assert_eq!(processed.width(), 100);
242        assert_eq!(processed.height(), 100);
243    }
244
245    #[test]
246    fn test_preprocess_options_builder() {
247        let options = PreprocessOptions {
248            auto_rotate: false,
249            auto_deskew: false,
250            enhance_contrast: true,
251            denoise: true,
252            threshold: Some(128),
253            adaptive_threshold: false,
254            ..Default::default()
255        };
256
257        assert!(!options.auto_rotate);
258        assert!(!options.auto_deskew);
259        assert!(options.enhance_contrast);
260        assert_eq!(options.threshold, Some(128));
261    }
262
263    #[test]
264    fn test_region_type_serialization() {
265        let region = TextRegion {
266            region_type: RegionType::Math,
267            bbox: (10, 20, 100, 50),
268            confidence: 0.95,
269            text_height: 12.0,
270            baseline_angle: 0.5,
271        };
272
273        let json = serde_json::to_string(&region).unwrap();
274        let deserialized: TextRegion = serde_json::from_str(&json).unwrap();
275
276        assert_eq!(deserialized.region_type, RegionType::Math);
277        assert_eq!(deserialized.bbox, (10, 20, 100, 50));
278        assert!((deserialized.confidence - 0.95).abs() < 0.001);
279    }
280}