oxify_connect_vision/
preprocessing.rs

1//! Image preprocessing utilities for OCR.
2//!
3//! This module provides image preprocessing operations to improve OCR accuracy:
4//! - Auto-resize large images
5//! - Noise reduction
6//! - Contrast enhancement
7//! - Deskewing
8//! - Border removal
9
10use crate::errors::{Result, VisionError};
11use image::{DynamicImage, GrayImage, Luma, Rgb};
12use imageproc::geometric_transformations::{rotate_about_center, Interpolation};
13
14/// Maximum image dimension (width or height) for automatic resizing.
15pub const DEFAULT_MAX_DIMENSION: u32 = 4096;
16
17/// Preprocessing configuration.
18#[derive(Debug, Clone)]
19pub struct PreprocessConfig {
20    /// Auto-resize images larger than this dimension
21    pub max_dimension: Option<u32>,
22    /// Apply noise reduction
23    pub denoise: bool,
24    /// Apply contrast enhancement
25    pub enhance_contrast: bool,
26    /// Apply deskewing (rotation correction)
27    pub deskew: bool,
28    /// Remove borders
29    pub remove_borders: bool,
30    /// Convert to grayscale
31    pub grayscale: bool,
32}
33
34impl Default for PreprocessConfig {
35    fn default() -> Self {
36        Self {
37            max_dimension: Some(DEFAULT_MAX_DIMENSION),
38            denoise: false,
39            enhance_contrast: false,
40            deskew: false,
41            remove_borders: false,
42            grayscale: false,
43        }
44    }
45}
46
47impl PreprocessConfig {
48    /// Create a configuration for high-quality OCR preprocessing.
49    ///
50    /// Enables all preprocessing steps for maximum quality.
51    pub fn high_quality() -> Self {
52        Self {
53            max_dimension: Some(DEFAULT_MAX_DIMENSION),
54            denoise: true,
55            enhance_contrast: true,
56            deskew: true,
57            remove_borders: true,
58            grayscale: true,
59        }
60    }
61
62    /// Create a configuration for fast preprocessing.
63    ///
64    /// Only resizes images, skipping expensive operations.
65    pub fn fast() -> Self {
66        Self {
67            max_dimension: Some(DEFAULT_MAX_DIMENSION),
68            denoise: false,
69            enhance_contrast: false,
70            deskew: false,
71            remove_borders: false,
72            grayscale: false,
73        }
74    }
75
76    /// Create a minimal configuration (no preprocessing).
77    pub fn none() -> Self {
78        Self {
79            max_dimension: None,
80            denoise: false,
81            enhance_contrast: false,
82            deskew: false,
83            remove_borders: false,
84            grayscale: false,
85        }
86    }
87}
88
89/// Image preprocessor for OCR.
90pub struct ImagePreprocessor {
91    config: PreprocessConfig,
92}
93
94impl ImagePreprocessor {
95    /// Create a new preprocessor with the given configuration.
96    pub fn new(config: PreprocessConfig) -> Self {
97        Self { config }
98    }
99
100    /// Create a preprocessor with default settings.
101    pub fn default_config() -> Self {
102        Self {
103            config: PreprocessConfig::default(),
104        }
105    }
106
107    /// Preprocess an image for OCR.
108    ///
109    /// Applies configured preprocessing steps in optimal order.
110    pub fn preprocess(&self, mut image: DynamicImage) -> Result<DynamicImage> {
111        // Step 1: Auto-resize if needed
112        if let Some(max_dim) = self.config.max_dimension {
113            image = resize_if_needed(image, max_dim)?;
114        }
115
116        // Step 2: Convert to grayscale if requested
117        if self.config.grayscale {
118            image = DynamicImage::ImageLuma8(image.to_luma8());
119        }
120
121        // Step 3: Remove borders (before other processing)
122        if self.config.remove_borders {
123            image = remove_borders(image)?;
124        }
125
126        // Step 4: Deskew (rotation correction)
127        if self.config.deskew {
128            image = deskew_image(image)?;
129        }
130
131        // Step 5: Denoise
132        if self.config.denoise {
133            image = denoise_image(image)?;
134        }
135
136        // Step 6: Enhance contrast (last step for best results)
137        if self.config.enhance_contrast {
138            image = enhance_contrast(image)?;
139        }
140
141        Ok(image)
142    }
143
144    /// Preprocess image bytes directly.
145    ///
146    /// Decodes the image, preprocesses it, and returns the processed bytes.
147    pub fn preprocess_bytes(
148        &self,
149        image_data: &[u8],
150        format: image::ImageFormat,
151    ) -> Result<Vec<u8>> {
152        // Decode image
153        let image = image::load_from_memory(image_data)
154            .map_err(|e| VisionError::image_processing(format!("Failed to decode image: {}", e)))?;
155
156        // Preprocess
157        let processed = self.preprocess(image)?;
158
159        // Encode back to bytes
160        let mut output = Vec::new();
161        processed
162            .write_to(&mut std::io::Cursor::new(&mut output), format)
163            .map_err(|e| VisionError::image_processing(format!("Failed to encode image: {}", e)))?;
164
165        Ok(output)
166    }
167}
168
169/// Resize image if it exceeds the maximum dimension.
170///
171/// Maintains aspect ratio and only resizes if needed.
172pub fn resize_if_needed(image: DynamicImage, max_dimension: u32) -> Result<DynamicImage> {
173    let (width, height) = (image.width(), image.height());
174
175    // Check if resizing is needed
176    if width <= max_dimension && height <= max_dimension {
177        return Ok(image);
178    }
179
180    // Calculate new dimensions maintaining aspect ratio
181    let (new_width, new_height) = if width > height {
182        let scale = max_dimension as f32 / width as f32;
183        (max_dimension, (height as f32 * scale) as u32)
184    } else {
185        let scale = max_dimension as f32 / height as f32;
186        ((width as f32 * scale) as u32, max_dimension)
187    };
188
189    tracing::debug!(
190        "Resizing image from {}x{} to {}x{}",
191        width,
192        height,
193        new_width,
194        new_height
195    );
196
197    Ok(image.resize(new_width, new_height, image::imageops::FilterType::Lanczos3))
198}
199
200/// Apply noise reduction using median filter.
201///
202/// Reduces salt-and-pepper noise while preserving edges.
203pub fn denoise_image(image: DynamicImage) -> Result<DynamicImage> {
204    let gray = image.to_luma8();
205    let denoised = median_filter(&gray, 3);
206    Ok(DynamicImage::ImageLuma8(denoised))
207}
208
209/// Apply median filter to reduce noise.
210fn median_filter(image: &GrayImage, radius: u32) -> GrayImage {
211    let (width, height) = image.dimensions();
212    let mut output = GrayImage::new(width, height);
213
214    for y in 0..height {
215        for x in 0..width {
216            let mut values = Vec::new();
217
218            // Collect values in neighborhood
219            for dy in -(radius as i32)..=(radius as i32) {
220                for dx in -(radius as i32)..=(radius as i32) {
221                    let nx = (x as i32 + dx).clamp(0, width as i32 - 1) as u32;
222                    let ny = (y as i32 + dy).clamp(0, height as i32 - 1) as u32;
223                    values.push(image.get_pixel(nx, ny)[0]);
224                }
225            }
226
227            // Sort and take median
228            values.sort_unstable();
229            let median = values[values.len() / 2];
230            output.put_pixel(x, y, Luma([median]));
231        }
232    }
233
234    output
235}
236
237/// Enhance image contrast using histogram equalization.
238///
239/// Improves text visibility by stretching the intensity histogram.
240pub fn enhance_contrast(image: DynamicImage) -> Result<DynamicImage> {
241    let gray = image.to_luma8();
242    let equalized = histogram_equalization(&gray);
243    Ok(DynamicImage::ImageLuma8(equalized))
244}
245
246/// Apply histogram equalization to enhance contrast.
247fn histogram_equalization(image: &GrayImage) -> GrayImage {
248    let (width, height) = image.dimensions();
249    let total_pixels = (width * height) as f32;
250
251    // Calculate histogram
252    let mut histogram = [0u32; 256];
253    for pixel in image.pixels() {
254        histogram[pixel[0] as usize] += 1;
255    }
256
257    // Calculate cumulative distribution function (CDF)
258    let mut cdf = [0u32; 256];
259    cdf[0] = histogram[0];
260    for i in 1..256 {
261        cdf[i] = cdf[i - 1] + histogram[i];
262    }
263
264    // Normalize CDF to create lookup table
265    let cdf_min = *cdf.iter().find(|&&x| x > 0).unwrap_or(&0);
266    let mut lut = [0u8; 256];
267    for i in 0..256 {
268        let normalized =
269            ((cdf[i] - cdf_min) as f32 / (total_pixels - cdf_min as f32) * 255.0) as u8;
270        lut[i] = normalized;
271    }
272
273    // Apply lookup table
274    let mut output = GrayImage::new(width, height);
275    for (x, y, pixel) in image.enumerate_pixels() {
276        let new_value = lut[pixel[0] as usize];
277        output.put_pixel(x, y, Luma([new_value]));
278    }
279
280    output
281}
282
283/// Detect and correct image skew (rotation).
284///
285/// Uses edge detection and Hough transform approximation.
286pub fn deskew_image(image: DynamicImage) -> Result<DynamicImage> {
287    let angle = detect_skew_angle(&image.to_luma8());
288
289    // Only rotate if skew is significant (> 0.5 degrees)
290    if angle.abs() < 0.5 {
291        return Ok(image);
292    }
293
294    tracing::debug!("Deskewing image by {:.2} degrees", angle);
295
296    // Convert to RGB for rotation (grayscale rotation has issues)
297    let rgb_image = image.to_rgb8();
298    let rotated = rotate_about_center(
299        &rgb_image,
300        angle.to_radians(),
301        Interpolation::Bilinear,
302        Rgb([255u8, 255u8, 255u8]),
303    );
304
305    Ok(DynamicImage::ImageRgb8(rotated))
306}
307
308/// Detect skew angle using edge-based heuristic.
309///
310/// Returns the estimated rotation angle in degrees.
311fn detect_skew_angle(image: &GrayImage) -> f32 {
312    // Simplified skew detection using horizontal edge density
313    let (width, height) = image.dimensions();
314    let mut best_angle = 0.0f32;
315    let mut best_score = 0.0f32;
316
317    // Try angles from -10 to +10 degrees
318    for angle_deg in -10..=10 {
319        let angle = angle_deg as f32;
320        let mut score = 0.0f32;
321
322        // Sample horizontal lines
323        for y in (height / 4..height * 3 / 4).step_by(10) {
324            let mut edge_count = 0;
325            for x in 1..width {
326                let diff =
327                    (image.get_pixel(x, y)[0] as i32 - image.get_pixel(x - 1, y)[0] as i32).abs();
328                if diff > 30 {
329                    edge_count += 1;
330                }
331            }
332            score += edge_count as f32;
333        }
334
335        if score > best_score {
336            best_score = score;
337            best_angle = angle;
338        }
339    }
340
341    best_angle
342}
343
344/// Remove borders from an image.
345///
346/// Detects and crops white/blank borders around the content.
347pub fn remove_borders(image: DynamicImage) -> Result<DynamicImage> {
348    let gray = image.to_luma8();
349    let (width, height) = gray.dimensions();
350
351    // Find content bounds
352    let mut min_x = width;
353    let mut max_x = 0;
354    let mut min_y = height;
355    let mut max_y = 0;
356
357    // Threshold for detecting content (non-white pixels)
358    let threshold = 240u8;
359
360    for y in 0..height {
361        for x in 0..width {
362            if gray.get_pixel(x, y)[0] < threshold {
363                min_x = min_x.min(x);
364                max_x = max_x.max(x);
365                min_y = min_y.min(y);
366                max_y = max_y.max(y);
367            }
368        }
369    }
370
371    // If no content found, return original
372    if min_x >= max_x || min_y >= max_y {
373        return Ok(image);
374    }
375
376    // Add small padding
377    let padding = 10;
378    min_x = min_x.saturating_sub(padding);
379    min_y = min_y.saturating_sub(padding);
380    max_x = (max_x + padding).min(width - 1);
381    max_y = (max_y + padding).min(height - 1);
382
383    tracing::debug!(
384        "Removing borders: cropping to ({}, {}) -> ({}, {})",
385        min_x,
386        min_y,
387        max_x,
388        max_y
389    );
390
391    // Crop image
392    let crop_width = max_x - min_x + 1;
393    let crop_height = max_y - min_y + 1;
394    Ok(image.crop_imm(min_x, min_y, crop_width, crop_height))
395}
396
397/// Apply adaptive thresholding for binarization.
398///
399/// Converts image to black and white, useful for text extraction.
400#[allow(dead_code)]
401pub fn adaptive_threshold(image: &GrayImage, window_size: u32) -> GrayImage {
402    let (width, height) = image.dimensions();
403    let mut output = GrayImage::new(width, height);
404    let half_window = window_size / 2;
405
406    for y in 0..height {
407        for x in 0..width {
408            // Calculate local mean
409            let mut sum = 0u32;
410            let mut count = 0u32;
411
412            for dy in -(half_window as i32)..=(half_window as i32) {
413                for dx in -(half_window as i32)..=(half_window as i32) {
414                    let nx = (x as i32 + dx).clamp(0, width as i32 - 1) as u32;
415                    let ny = (y as i32 + dy).clamp(0, height as i32 - 1) as u32;
416                    sum += image.get_pixel(nx, ny)[0] as u32;
417                    count += 1;
418                }
419            }
420
421            let mean = sum / count;
422            let pixel_value = image.get_pixel(x, y)[0] as u32;
423
424            // Threshold with small bias
425            let threshold = mean.saturating_sub(5);
426            let new_value = if pixel_value < threshold { 0 } else { 255 };
427            output.put_pixel(x, y, Luma([new_value]));
428        }
429    }
430
431    output
432}
433
434#[cfg(test)]
435mod tests {
436    use super::*;
437    use image::RgbImage;
438
439    fn create_test_image(width: u32, height: u32) -> DynamicImage {
440        let img = RgbImage::from_fn(width, height, |x, y| {
441            let val = ((x + y) % 255) as u8;
442            Rgb([val, val, val])
443        });
444        DynamicImage::ImageRgb8(img)
445    }
446
447    #[test]
448    fn test_preprocess_config_default() {
449        let config = PreprocessConfig::default();
450        assert_eq!(config.max_dimension, Some(DEFAULT_MAX_DIMENSION));
451        assert!(!config.denoise);
452    }
453
454    #[test]
455    fn test_preprocess_config_high_quality() {
456        let config = PreprocessConfig::high_quality();
457        assert!(config.denoise);
458        assert!(config.enhance_contrast);
459        assert!(config.deskew);
460        assert!(config.remove_borders);
461    }
462
463    #[test]
464    fn test_preprocess_config_fast() {
465        let config = PreprocessConfig::fast();
466        assert!(!config.denoise);
467        assert!(!config.enhance_contrast);
468    }
469
470    #[test]
471    fn test_preprocess_config_none() {
472        let config = PreprocessConfig::none();
473        assert_eq!(config.max_dimension, None);
474        assert!(!config.denoise);
475    }
476
477    #[test]
478    fn test_preprocessor_creation() {
479        let preprocessor = ImagePreprocessor::default_config();
480        let _ = preprocessor.config;
481    }
482
483    #[test]
484    fn test_resize_if_needed_no_resize() {
485        let image = create_test_image(1000, 1000);
486        let result = resize_if_needed(image.clone(), 2000).unwrap();
487        assert_eq!(result.width(), 1000);
488        assert_eq!(result.height(), 1000);
489    }
490
491    #[test]
492    fn test_resize_if_needed_resize() {
493        // Optimized test with smaller image for fast execution
494        let image = create_test_image(2000, 1200);
495        let original_width = image.width();
496        let original_height = image.height();
497        let result = resize_if_needed(image, 1600).unwrap();
498        assert!(result.width() <= 1600);
499        assert!(result.height() <= 1600);
500        // Verify aspect ratio is maintained
501        let aspect_ratio = original_width as f32 / original_height as f32;
502        let result_ratio = result.width() as f32 / result.height() as f32;
503        assert!((aspect_ratio - result_ratio).abs() < 0.01);
504    }
505
506    #[test]
507    #[ignore]
508    fn test_resize_if_needed_resize_large() {
509        // Slow comprehensive test with large image (84s+)
510        // Run with: cargo test test_resize_if_needed_resize_large -- --ignored
511        let image = create_test_image(5000, 3000);
512        let result = resize_if_needed(image, 4000).unwrap();
513        assert!(result.width() <= 4000);
514        assert!(result.height() <= 4000);
515    }
516
517    #[test]
518    fn test_denoise_image() {
519        let image = create_test_image(100, 100);
520        let result = denoise_image(image).unwrap();
521        assert!(result.width() > 0);
522        assert!(result.height() > 0);
523    }
524
525    #[test]
526    fn test_enhance_contrast() {
527        let image = create_test_image(100, 100);
528        let result = enhance_contrast(image).unwrap();
529        assert!(result.width() > 0);
530        assert!(result.height() > 0);
531    }
532
533    #[test]
534    fn test_deskew_image() {
535        let image = create_test_image(100, 100);
536        let result = deskew_image(image).unwrap();
537        assert!(result.width() > 0);
538        assert!(result.height() > 0);
539    }
540
541    #[test]
542    fn test_remove_borders() {
543        let image = create_test_image(100, 100);
544        let result = remove_borders(image).unwrap();
545        assert!(result.width() > 0);
546        assert!(result.height() > 0);
547    }
548
549    #[test]
550    fn test_preprocessor_preprocess() {
551        let image = create_test_image(200, 200);
552        let config = PreprocessConfig::default();
553        let preprocessor = ImagePreprocessor::new(config);
554        let result = preprocessor.preprocess(image).unwrap();
555        assert!(result.width() > 0);
556        assert!(result.height() > 0);
557    }
558
559    #[test]
560    fn test_median_filter() {
561        let gray = GrayImage::from_fn(50, 50, |x, y| Luma([((x + y) % 255) as u8]));
562        let filtered = median_filter(&gray, 2);
563        assert_eq!(filtered.dimensions(), gray.dimensions());
564    }
565
566    #[test]
567    fn test_histogram_equalization() {
568        let gray = GrayImage::from_fn(50, 50, |x, y| Luma([((x + y) % 128) as u8]));
569        let equalized = histogram_equalization(&gray);
570        assert_eq!(equalized.dimensions(), gray.dimensions());
571    }
572
573    #[test]
574    fn test_detect_skew_angle() {
575        let gray = GrayImage::from_fn(100, 100, |x, y| Luma([((x + y) % 255) as u8]));
576        let angle = detect_skew_angle(&gray);
577        assert!(angle.abs() <= 10.0);
578    }
579
580    #[test]
581    fn test_adaptive_threshold() {
582        let gray = GrayImage::from_fn(50, 50, |x, y| Luma([((x + y) % 255) as u8]));
583        let thresholded = adaptive_threshold(&gray, 11);
584        assert_eq!(thresholded.dimensions(), gray.dimensions());
585    }
586}