Skip to main content

oar_ocr_core/domain/adapters/
preprocessing.rs

1//! Shared preprocessing helpers for model adapters.
2//!
3//! This module centralizes common preprocessing operations to reduce code duplication
4//! across model implementations. It provides:
5//! - Configuration helpers for common model types
6//! - Reusable preprocessing pipelines for common patterns
7//! - Utility functions for image format conversions
8
9use crate::core::{OCRError, Tensor4D};
10use crate::models::classification::PPLCNetPreprocessConfig;
11use crate::models::detection::db::DBPreprocessConfig;
12use crate::processors::{ImageScaleInfo, LimitType, NormalizeImage};
13use image::{DynamicImage, RgbImage};
14
15/// Construct a PP-LCNet preprocessing config with a custom input shape.
16///
17/// Leaves other fields at their `Default` values so adapters can override only
18/// what they need (e.g., normalization statistics).
19pub fn pp_lcnet_preprocess(input_shape: (u32, u32)) -> PPLCNetPreprocessConfig {
20    PPLCNetPreprocessConfig {
21        input_shape,
22        ..Default::default()
23    }
24}
25
26/// Construct a PP-LCNet preprocessing config with custom normalization stats.
27///
28/// Useful for adapters that expect zero-centered inputs but otherwise rely on
29/// the standard defaults.
30pub fn pp_lcnet_preprocess_with_norm(
31    input_shape: (u32, u32),
32    mean: [f32; 3],
33    std: [f32; 3],
34) -> PPLCNetPreprocessConfig {
35    let mut config = pp_lcnet_preprocess(input_shape);
36    config.normalize_mean = mean.to_vec();
37    config.normalize_std = std.to_vec();
38    config
39}
40
41/// Construct a DB preprocessing config that limits images by side length.
42pub fn db_preprocess_with_limit_side_len(limit_side_len: u32) -> DBPreprocessConfig {
43    DBPreprocessConfig {
44        limit_side_len: Some(limit_side_len),
45        ..Default::default()
46    }
47}
48
49/// Construct a DB preprocessing config that resizes by long edge.
50pub fn db_preprocess_with_resize_long(resize_long: u32) -> DBPreprocessConfig {
51    DBPreprocessConfig {
52        resize_long: Some(resize_long),
53        ..Default::default()
54    }
55}
56
57/// Construct a DB preprocessing config based on text type.
58///
59/// This function provides default preprocessing configurations:
60/// - "general" (default): limit_side_len=960, limit_type=Max, max_side_limit=4000
61/// - "seal": limit_side_len=736, limit_type=Min, max_side_limit=4000
62///
63/// Note: PP-StructureV3's overall OCR uses different defaults (736/min for all text).
64/// Those defaults are applied in `OARStructureBuilder`, not here.
65///
66/// # Arguments
67///
68/// * `text_type` - Optional text type string ("general", "seal", etc.)
69///
70/// # Returns
71///
72/// DBPreprocessConfig configured for the specified text type
73pub fn db_preprocess_for_text_type(text_type: Option<&str>) -> DBPreprocessConfig {
74    match text_type {
75        Some("seal") => DBPreprocessConfig {
76            limit_side_len: Some(736),
77            limit_type: Some(LimitType::Min),
78            max_side_limit: Some(4000),
79            ..Default::default()
80        },
81        _ => {
82            // Default to "general" text configuration
83            DBPreprocessConfig {
84                limit_side_len: Some(960),
85                limit_type: Some(LimitType::Max),
86                max_side_limit: Some(4000),
87                ..Default::default()
88            }
89        }
90    }
91}
92
93/// Converts a batch of RGB images to dynamic images.
94///
95/// This is a common operation needed before most preprocessing steps.
96///
97/// # Arguments
98///
99/// * `images` - Vector of RGB images to convert
100///
101/// # Returns
102///
103/// Vector of dynamic images
104///
105/// # Example
106///
107/// ```rust,no_run
108/// // let rgb_images: Vec<RgbImage> = load_images();
109/// // let dynamic_images = rgb_to_dynamic(rgb_images);
110/// ```
111#[inline]
112pub fn rgb_to_dynamic(images: Vec<RgbImage>) -> Vec<DynamicImage> {
113    images.into_iter().map(DynamicImage::ImageRgb8).collect()
114}
115
116/// Applies a resizer and then normalizes the result to a tensor.
117///
118/// This is a common pattern used in recognition models like CRNN.
119///
120/// # Arguments
121///
122/// * `images` - Input RGB images
123/// * `resizer` - Resizer implementing the apply method
124/// * `normalizer` - Normalizer to convert to tensor
125///
126/// # Returns
127///
128/// Preprocessed 4D tensor ready for inference
129///
130/// # Example
131///
132/// ```rust,no_run
133/// // let tensor = resize_and_normalize(
134/// //     images,
135/// //     &self.resizer,
136/// //     &self.normalizer
137/// // )?;
138/// ```
139pub fn resize_and_normalize<R>(
140    images: Vec<RgbImage>,
141    resizer: &R,
142    normalizer: &NormalizeImage,
143) -> Result<Tensor4D, OCRError>
144where
145    R: ResizeOperation,
146{
147    let resized_images = resizer.resize(images)?;
148    let dynamic_images = rgb_to_dynamic(resized_images);
149    normalizer.normalize_batch_to(dynamic_images)
150}
151
152/// Applies a detection resizer (with scale info) and then normalizes the result.
153///
154/// This is a common pattern used in detection models like DB and RT-DETR.
155///
156/// # Arguments
157///
158/// * `images` - Input RGB images
159/// * `resizer` - Detection resizer that returns scale info
160/// * `normalizer` - Normalizer to convert to tensor
161///
162/// # Returns
163///
164/// Tuple of (preprocessed tensor, scale information for each image)
165///
166/// # Example
167///
168/// ```rust,no_run
169/// // let (tensor, scales) = detection_resize_and_normalize(
170/// //     images,
171/// //     &self.resizer,
172/// //     &self.normalizer,
173/// //     None,
174/// //     None,
175/// //     None,
176/// // )?;
177/// ```
178pub fn detection_resize_and_normalize<R>(
179    images: Vec<RgbImage>,
180    resizer: &R,
181    normalizer: &NormalizeImage,
182) -> Result<(Tensor4D, Vec<ImageScaleInfo>), OCRError>
183where
184    R: DetectionResizeOperation,
185{
186    let dynamic_images = rgb_to_dynamic(images);
187    let (resized_images, scale_info) = resizer.resize_with_scale(dynamic_images)?;
188    let tensor = normalizer.normalize_batch_to(resized_images)?;
189    Ok((tensor, scale_info))
190}
191
192/// Trait for resize operations that return only resized images.
193///
194/// Used for simple resize operations in recognition models.
195pub trait ResizeOperation {
196    /// Resizes a batch of images.
197    fn resize(&self, images: Vec<RgbImage>) -> Result<Vec<RgbImage>, OCRError>;
198}
199
200/// Trait for detection resize operations that return scale information.
201///
202/// Used for detection models that need to map predictions back to original coordinates.
203pub trait DetectionResizeOperation {
204    /// Resizes a batch of images and returns scale information.
205    fn resize_with_scale(
206        &self,
207        images: Vec<DynamicImage>,
208    ) -> Result<(Vec<DynamicImage>, Vec<ImageScaleInfo>), OCRError>;
209}
210
211/// Builder for common preprocessing pipelines.
212///
213/// Provides a fluent interface for constructing preprocessing operations
214/// without duplicating code across adapters.
215///
216/// # Example
217///
218/// ```rust,no_run
219/// // let tensor = PreprocessPipelineBuilder::new()
220/// //     .rgb_images(images)
221/// //     .resize(&resizer)
222/// //     .normalize(&normalizer)
223/// //     .build()?;
224/// ```
225pub struct PreprocessPipelineBuilder {
226    images: Option<Vec<RgbImage>>,
227    dynamic_images: Option<Vec<DynamicImage>>,
228}
229
230impl PreprocessPipelineBuilder {
231    /// Creates a new preprocessing pipeline builder.
232    pub fn new() -> Self {
233        Self {
234            images: None,
235            dynamic_images: None,
236        }
237    }
238
239    /// Sets the input RGB images.
240    pub fn rgb_images(mut self, images: Vec<RgbImage>) -> Self {
241        self.images = Some(images);
242        self
243    }
244
245    /// Sets the input dynamic images.
246    pub fn dynamic_images(mut self, images: Vec<DynamicImage>) -> Self {
247        self.dynamic_images = Some(images);
248        self
249    }
250
251    /// Converts RGB images to dynamic images.
252    pub fn to_dynamic(mut self) -> Self {
253        if let Some(images) = self.images.take() {
254            self.dynamic_images = Some(rgb_to_dynamic(images));
255        }
256        self
257    }
258
259    /// Applies a resize operation.
260    pub fn resize<R>(mut self, resizer: &R) -> Result<Self, OCRError>
261    where
262        R: ResizeOperation,
263    {
264        if let Some(images) = self.images.take() {
265            let resized = resizer.resize(images)?;
266            self.images = Some(resized);
267        }
268        Ok(self)
269    }
270
271    /// Normalizes images to a tensor.
272    pub fn normalize(self, normalizer: &NormalizeImage) -> Result<Tensor4D, OCRError> {
273        let dynamic_images = if let Some(images) = self.images {
274            rgb_to_dynamic(images)
275        } else if let Some(images) = self.dynamic_images {
276            images
277        } else {
278            return Err(OCRError::InvalidInput {
279                message: "No images provided to preprocessing pipeline".to_string(),
280            });
281        };
282
283        normalizer.normalize_batch_to(dynamic_images)
284    }
285
286    /// Builds the final tensor (alias for normalize for consistency).
287    pub fn build(self, normalizer: &NormalizeImage) -> Result<Tensor4D, OCRError> {
288        self.normalize(normalizer)
289    }
290}
291
292impl Default for PreprocessPipelineBuilder {
293    fn default() -> Self {
294        Self::new()
295    }
296}
297
298use crate::processors::{DetResizeForTest, OCRResize};
299
300/// Implement ResizeOperation for OCRResize (recognition models).
301impl ResizeOperation for OCRResize {
302    fn resize(&self, images: Vec<RgbImage>) -> Result<Vec<RgbImage>, OCRError> {
303        self.apply(&images)
304    }
305}
306
307/// Wrapper for DetResizeForTest to implement DetectionResizeOperation.
308///
309/// This allows DetResizeForTest to be used in the common preprocessing pipeline.
310pub struct DetectionResizer<'a> {
311    resizer: &'a DetResizeForTest,
312}
313
314impl<'a> DetectionResizer<'a> {
315    /// Creates a new detection resizer wrapper.
316    pub fn new(resizer: &'a DetResizeForTest) -> Self {
317        Self { resizer }
318    }
319}
320
321impl<'a> DetectionResizeOperation for DetectionResizer<'a> {
322    fn resize_with_scale(
323        &self,
324        images: Vec<DynamicImage>,
325    ) -> Result<(Vec<DynamicImage>, Vec<ImageScaleInfo>), OCRError> {
326        let (resized, scales) = self.resizer.apply(images, None, None, None);
327        Ok((resized, scales))
328    }
329}
330
331/// Convenience function to wrap a DetResizeForTest for use in preprocessing pipelines.
332#[inline]
333pub fn wrap_detection_resizer(resizer: &DetResizeForTest) -> DetectionResizer<'_> {
334    DetectionResizer::new(resizer)
335}