oar_ocr_core/domain/adapters/preprocessing.rs
1//! Shared preprocessing helpers for model adapters.
2//!
3//! This module centralizes common preprocessing operations to reduce code duplication
4//! across model implementations. It provides:
5//! - Configuration helpers for common model types
6//! - Reusable preprocessing pipelines for common patterns
7//! - Utility functions for image format conversions
8
9use crate::core::{OCRError, Tensor4D};
10use crate::models::classification::PPLCNetPreprocessConfig;
11use crate::models::detection::db::DBPreprocessConfig;
12use crate::processors::{ImageScaleInfo, LimitType, NormalizeImage};
13use image::{DynamicImage, RgbImage};
14
15/// Construct a PP-LCNet preprocessing config with a custom input shape.
16///
17/// Leaves other fields at their `Default` values so adapters can override only
18/// what they need (e.g., normalization statistics).
19pub fn pp_lcnet_preprocess(input_shape: (u32, u32)) -> PPLCNetPreprocessConfig {
20 PPLCNetPreprocessConfig {
21 input_shape,
22 ..Default::default()
23 }
24}
25
26/// Construct a PP-LCNet preprocessing config with custom normalization stats.
27///
28/// Useful for adapters that expect zero-centered inputs but otherwise rely on
29/// the standard defaults.
30pub fn pp_lcnet_preprocess_with_norm(
31 input_shape: (u32, u32),
32 mean: [f32; 3],
33 std: [f32; 3],
34) -> PPLCNetPreprocessConfig {
35 let mut config = pp_lcnet_preprocess(input_shape);
36 config.normalize_mean = mean.to_vec();
37 config.normalize_std = std.to_vec();
38 config
39}
40
41/// Construct a DB preprocessing config that limits images by side length.
42pub fn db_preprocess_with_limit_side_len(limit_side_len: u32) -> DBPreprocessConfig {
43 DBPreprocessConfig {
44 limit_side_len: Some(limit_side_len),
45 ..Default::default()
46 }
47}
48
49/// Construct a DB preprocessing config that resizes by long edge.
50pub fn db_preprocess_with_resize_long(resize_long: u32) -> DBPreprocessConfig {
51 DBPreprocessConfig {
52 resize_long: Some(resize_long),
53 ..Default::default()
54 }
55}
56
57/// Construct a DB preprocessing config based on text type.
58///
59/// This function provides default preprocessing configurations:
60/// - "general" (default): limit_side_len=960, limit_type=Max, max_side_limit=4000
61/// - "seal": limit_side_len=736, limit_type=Min, max_side_limit=4000
62///
63/// Note: PP-StructureV3's overall OCR uses different defaults (736/min for all text).
64/// Those defaults are applied in `OARStructureBuilder`, not here.
65///
66/// # Arguments
67///
68/// * `text_type` - Optional text type string ("general", "seal", etc.)
69///
70/// # Returns
71///
72/// DBPreprocessConfig configured for the specified text type
73pub fn db_preprocess_for_text_type(text_type: Option<&str>) -> DBPreprocessConfig {
74 match text_type {
75 Some("seal") => DBPreprocessConfig {
76 limit_side_len: Some(736),
77 limit_type: Some(LimitType::Min),
78 max_side_limit: Some(4000),
79 ..Default::default()
80 },
81 _ => {
82 // Default to "general" text configuration
83 DBPreprocessConfig {
84 limit_side_len: Some(960),
85 limit_type: Some(LimitType::Max),
86 max_side_limit: Some(4000),
87 ..Default::default()
88 }
89 }
90 }
91}
92
93/// Converts a batch of RGB images to dynamic images.
94///
95/// This is a common operation needed before most preprocessing steps.
96///
97/// # Arguments
98///
99/// * `images` - Vector of RGB images to convert
100///
101/// # Returns
102///
103/// Vector of dynamic images
104///
105/// # Example
106///
107/// ```rust,no_run
108/// // let rgb_images: Vec<RgbImage> = load_images();
109/// // let dynamic_images = rgb_to_dynamic(rgb_images);
110/// ```
111#[inline]
112pub fn rgb_to_dynamic(images: Vec<RgbImage>) -> Vec<DynamicImage> {
113 images.into_iter().map(DynamicImage::ImageRgb8).collect()
114}
115
116/// Applies a resizer and then normalizes the result to a tensor.
117///
118/// This is a common pattern used in recognition models like CRNN.
119///
120/// # Arguments
121///
122/// * `images` - Input RGB images
123/// * `resizer` - Resizer implementing the apply method
124/// * `normalizer` - Normalizer to convert to tensor
125///
126/// # Returns
127///
128/// Preprocessed 4D tensor ready for inference
129///
130/// # Example
131///
132/// ```rust,no_run
133/// // let tensor = resize_and_normalize(
134/// // images,
135/// // &self.resizer,
136/// // &self.normalizer
137/// // )?;
138/// ```
139pub fn resize_and_normalize<R>(
140 images: Vec<RgbImage>,
141 resizer: &R,
142 normalizer: &NormalizeImage,
143) -> Result<Tensor4D, OCRError>
144where
145 R: ResizeOperation,
146{
147 let resized_images = resizer.resize(images)?;
148 let dynamic_images = rgb_to_dynamic(resized_images);
149 normalizer.normalize_batch_to(dynamic_images)
150}
151
152/// Applies a detection resizer (with scale info) and then normalizes the result.
153///
154/// This is a common pattern used in detection models like DB and RT-DETR.
155///
156/// # Arguments
157///
158/// * `images` - Input RGB images
159/// * `resizer` - Detection resizer that returns scale info
160/// * `normalizer` - Normalizer to convert to tensor
161///
162/// # Returns
163///
164/// Tuple of (preprocessed tensor, scale information for each image)
165///
166/// # Example
167///
168/// ```rust,no_run
169/// // let (tensor, scales) = detection_resize_and_normalize(
170/// // images,
171/// // &self.resizer,
172/// // &self.normalizer,
173/// // None,
174/// // None,
175/// // None,
176/// // )?;
177/// ```
178pub fn detection_resize_and_normalize<R>(
179 images: Vec<RgbImage>,
180 resizer: &R,
181 normalizer: &NormalizeImage,
182) -> Result<(Tensor4D, Vec<ImageScaleInfo>), OCRError>
183where
184 R: DetectionResizeOperation,
185{
186 let dynamic_images = rgb_to_dynamic(images);
187 let (resized_images, scale_info) = resizer.resize_with_scale(dynamic_images)?;
188 let tensor = normalizer.normalize_batch_to(resized_images)?;
189 Ok((tensor, scale_info))
190}
191
192/// Trait for resize operations that return only resized images.
193///
194/// Used for simple resize operations in recognition models.
195pub trait ResizeOperation {
196 /// Resizes a batch of images.
197 fn resize(&self, images: Vec<RgbImage>) -> Result<Vec<RgbImage>, OCRError>;
198}
199
200/// Trait for detection resize operations that return scale information.
201///
202/// Used for detection models that need to map predictions back to original coordinates.
203pub trait DetectionResizeOperation {
204 /// Resizes a batch of images and returns scale information.
205 fn resize_with_scale(
206 &self,
207 images: Vec<DynamicImage>,
208 ) -> Result<(Vec<DynamicImage>, Vec<ImageScaleInfo>), OCRError>;
209}
210
211/// Builder for common preprocessing pipelines.
212///
213/// Provides a fluent interface for constructing preprocessing operations
214/// without duplicating code across adapters.
215///
216/// # Example
217///
218/// ```rust,no_run
219/// // let tensor = PreprocessPipelineBuilder::new()
220/// // .rgb_images(images)
221/// // .resize(&resizer)
222/// // .normalize(&normalizer)
223/// // .build()?;
224/// ```
225pub struct PreprocessPipelineBuilder {
226 images: Option<Vec<RgbImage>>,
227 dynamic_images: Option<Vec<DynamicImage>>,
228}
229
230impl PreprocessPipelineBuilder {
231 /// Creates a new preprocessing pipeline builder.
232 pub fn new() -> Self {
233 Self {
234 images: None,
235 dynamic_images: None,
236 }
237 }
238
239 /// Sets the input RGB images.
240 pub fn rgb_images(mut self, images: Vec<RgbImage>) -> Self {
241 self.images = Some(images);
242 self
243 }
244
245 /// Sets the input dynamic images.
246 pub fn dynamic_images(mut self, images: Vec<DynamicImage>) -> Self {
247 self.dynamic_images = Some(images);
248 self
249 }
250
251 /// Converts RGB images to dynamic images.
252 pub fn to_dynamic(mut self) -> Self {
253 if let Some(images) = self.images.take() {
254 self.dynamic_images = Some(rgb_to_dynamic(images));
255 }
256 self
257 }
258
259 /// Applies a resize operation.
260 pub fn resize<R>(mut self, resizer: &R) -> Result<Self, OCRError>
261 where
262 R: ResizeOperation,
263 {
264 if let Some(images) = self.images.take() {
265 let resized = resizer.resize(images)?;
266 self.images = Some(resized);
267 }
268 Ok(self)
269 }
270
271 /// Normalizes images to a tensor.
272 pub fn normalize(self, normalizer: &NormalizeImage) -> Result<Tensor4D, OCRError> {
273 let dynamic_images = if let Some(images) = self.images {
274 rgb_to_dynamic(images)
275 } else if let Some(images) = self.dynamic_images {
276 images
277 } else {
278 return Err(OCRError::InvalidInput {
279 message: "No images provided to preprocessing pipeline".to_string(),
280 });
281 };
282
283 normalizer.normalize_batch_to(dynamic_images)
284 }
285
286 /// Builds the final tensor (alias for normalize for consistency).
287 pub fn build(self, normalizer: &NormalizeImage) -> Result<Tensor4D, OCRError> {
288 self.normalize(normalizer)
289 }
290}
291
292impl Default for PreprocessPipelineBuilder {
293 fn default() -> Self {
294 Self::new()
295 }
296}
297
298use crate::processors::{DetResizeForTest, OCRResize};
299
300/// Implement ResizeOperation for OCRResize (recognition models).
301impl ResizeOperation for OCRResize {
302 fn resize(&self, images: Vec<RgbImage>) -> Result<Vec<RgbImage>, OCRError> {
303 self.apply(&images)
304 }
305}
306
307/// Wrapper for DetResizeForTest to implement DetectionResizeOperation.
308///
309/// This allows DetResizeForTest to be used in the common preprocessing pipeline.
310pub struct DetectionResizer<'a> {
311 resizer: &'a DetResizeForTest,
312}
313
314impl<'a> DetectionResizer<'a> {
315 /// Creates a new detection resizer wrapper.
316 pub fn new(resizer: &'a DetResizeForTest) -> Self {
317 Self { resizer }
318 }
319}
320
321impl<'a> DetectionResizeOperation for DetectionResizer<'a> {
322 fn resize_with_scale(
323 &self,
324 images: Vec<DynamicImage>,
325 ) -> Result<(Vec<DynamicImage>, Vec<ImageScaleInfo>), OCRError> {
326 let (resized, scales) = self.resizer.apply(images, None, None, None);
327 Ok((resized, scales))
328 }
329}
330
331/// Convenience function to wrap a DetResizeForTest for use in preprocessing pipelines.
332#[inline]
333pub fn wrap_detection_resizer(resizer: &DetResizeForTest) -> DetectionResizer<'_> {
334 DetectionResizer::new(resizer)
335}