oar_ocr/pipeline/oarocr/
extensible_integration.rs

1//! Integration layer for the extensible pipeline system with OAROCR.
2//!
3//! This module provides utilities to integrate the extensible pipeline system
4//! with the existing OAROCR pipeline while maintaining backward compatibility.
5
6use image::RgbImage;
7use std::path::Path;
8use std::sync::Arc;
9use tracing::{debug, info};
10
11use crate::core::OCRError;
12use crate::pipeline::oarocr::{OAROCRConfig, OAROCRResult};
13use crate::pipeline::stages::{
14    CroppingConfig, CroppingResult, ExtensibleCroppingStage, ExtensibleOrientationStage,
15    ExtensiblePipeline, ExtensiblePipelineConfig, ExtensibleRecognitionStage,
16    ExtensibleTextDetectionStage, ExtensibleTextLineOrientationStage, OrientationConfig,
17    OrientationResult, RecognitionConfig, RecognitionResult, StageContext, StageData, StageId,
18    TextDetectionConfig, TextDetectionResult, TextLineOrientationConfig,
19};
20
21/// Integration wrapper that bridges the extensible pipeline with OAROCR.
22pub struct ExtensibleOAROCR {
23    /// The extensible pipeline
24    pipeline: ExtensiblePipeline,
25    /// Configuration for the extensible pipeline
26    config: ExtensiblePipelineConfig,
27    /// Original OAROCR configuration for fallback
28    oarocr_config: OAROCRConfig,
29}
30
31impl ExtensibleOAROCR {
32    /// Create a new extensible OAROCR instance.
33    pub fn new(
34        oarocr_config: OAROCRConfig,
35        extensible_config: ExtensiblePipelineConfig,
36    ) -> Result<Self, OCRError> {
37        let mut pipeline = ExtensiblePipeline::new();
38
39        // Register stages based on configuration
40        Self::register_standard_stages(&mut pipeline, &oarocr_config, &extensible_config)?;
41
42        Ok(Self {
43            pipeline,
44            config: extensible_config,
45            oarocr_config,
46        })
47    }
48
49    /// Register standard OCR stages with the pipeline.
50    fn register_standard_stages(
51        pipeline: &mut ExtensiblePipeline,
52        oarocr_config: &OAROCRConfig,
53        extensible_config: &ExtensiblePipelineConfig,
54    ) -> Result<(), OCRError> {
55        // 1. Orientation Stage
56        if extensible_config.is_stage_enabled("orientation") {
57            let orientation_stage = ExtensibleOrientationStage::new(None); // Would use actual classifier
58            let orientation_config = extensible_config
59                .get_stage_config::<OrientationConfig>("orientation")
60                .or_else(|| oarocr_config.orientation_stage.as_ref().cloned());
61            pipeline.register_stage(orientation_stage, orientation_config)?;
62            debug!("Registered orientation stage");
63        }
64
65        // 2. Text Detection Stage
66        if extensible_config.is_stage_enabled("text_detection") {
67            let detection_stage = ExtensibleTextDetectionStage::new(None); // Would use actual detector
68            let detection_config = extensible_config
69                .get_stage_config::<TextDetectionConfig>("text_detection")
70                .unwrap_or_default();
71            pipeline.register_stage(detection_stage, Some(detection_config))?;
72            debug!("Registered text detection stage");
73        }
74
75        // 3. Cropping Stage
76        if extensible_config.is_stage_enabled("cropping") {
77            let cropping_stage = ExtensibleCroppingStage::new();
78            let cropping_config = extensible_config
79                .get_stage_config::<CroppingConfig>("cropping")
80                .unwrap_or_default();
81            pipeline.register_stage(cropping_stage, Some(cropping_config))?;
82            debug!("Registered cropping stage");
83        }
84
85        // 4. Text Line Orientation Stage
86        if extensible_config.is_stage_enabled("text_line_orientation") {
87            let text_line_stage = ExtensibleTextLineOrientationStage::new(None); // Would use actual classifier
88            let text_line_config = extensible_config
89                .get_stage_config::<TextLineOrientationConfig>("text_line_orientation")
90                .or_else(|| oarocr_config.text_line_orientation_stage.as_ref().cloned());
91            pipeline.register_stage(text_line_stage, text_line_config)?;
92            debug!("Registered text line orientation stage");
93        }
94
95        // 5. Recognition Stage
96        if extensible_config.is_stage_enabled("recognition") {
97            let recognition_stage = ExtensibleRecognitionStage::new(None); // Would use actual recognizer
98            let recognition_config = extensible_config
99                .get_stage_config::<RecognitionConfig>("recognition")
100                .unwrap_or_else(|| {
101                    RecognitionConfig::from_legacy_config(
102                        oarocr_config.use_textline_orientation,
103                        oarocr_config.aspect_ratio_bucketing.clone(),
104                    )
105                });
106            pipeline.register_stage(recognition_stage, Some(recognition_config))?;
107            debug!("Registered recognition stage");
108        }
109
110        Ok(())
111    }
112
113    /// Process a single image using the extensible pipeline.
114    pub fn process_image(&mut self, image_path: &Path) -> Result<OAROCRResult, OCRError> {
115        info!(
116            "Processing image with extensible pipeline: {:?}",
117            image_path
118        );
119
120        // Load image
121        let input_img = crate::utils::load_image(image_path)?;
122        let input_img_arc = Arc::new(input_img.clone());
123
124        // Create stage context
125        let context = StageContext::new(input_img_arc.clone(), input_img_arc.clone(), 0);
126
127        // Create initial data
128        let initial_data = StageData::new(input_img);
129
130        // Execute pipeline - we need to modify the execute method to return the context
131        // For now, we'll use a workaround by executing the pipeline manually
132        self.execute_pipeline_and_convert(image_path, input_img_arc, context, initial_data)
133    }
134
135    /// Execute the pipeline and convert results to OAROCRResult format.
136    fn execute_pipeline_and_convert(
137        &mut self,
138        image_path: &Path,
139        input_img_arc: Arc<RgbImage>,
140        mut context: StageContext,
141        initial_data: StageData,
142    ) -> Result<OAROCRResult, OCRError> {
143        // Execute pipeline stages manually to retain access to context
144        let execution_order = self.pipeline.registry_mut().resolve_execution_order()?;
145        let mut current_data = initial_data;
146
147        info!("Executing pipeline with {} stages", execution_order.len());
148
149        for stage_id in execution_order {
150            let stage = self
151                .pipeline
152                .registry()
153                .get_stage(&stage_id)
154                .ok_or_else(|| OCRError::ConfigError {
155                    message: format!("Stage not found: {}", stage_id.as_str()),
156                })?;
157
158            let config = self.pipeline.registry().get_config(&stage_id);
159
160            // Check if stage is enabled
161            if !stage.is_enabled(&context, config) {
162                debug!("Skipping disabled stage: {}", stage.stage_name());
163                continue;
164            }
165
166            debug!("Executing stage: {}", stage.stage_name());
167
168            // Execute the stage
169            let stage_result = stage.process(&mut context, current_data, config)?;
170
171            // Store the result in context for other stages
172            context.set_stage_result(stage_id.clone(), stage_result.data);
173
174            // Update current data - stages that modify the image should update the context
175            current_data = StageData::new(context.current_image.as_ref().clone());
176
177            debug!(
178                "Stage {} completed in {:?}",
179                stage.stage_name(),
180                stage_result.metrics.processing_time
181            );
182        }
183
184        // Convert extensible pipeline results to OAROCRResult format
185        self.convert_pipeline_results_to_oarocr(image_path, input_img_arc, &context)
186    }
187
188    /// Convert extensible pipeline results to OAROCRResult format.
189    fn convert_pipeline_results_to_oarocr(
190        &self,
191        image_path: &Path,
192        input_img_arc: Arc<RgbImage>,
193        context: &StageContext,
194    ) -> Result<OAROCRResult, OCRError> {
195        // Extract results from each stage
196        let orientation_result = self.extract_orientation_result(context);
197        let text_detection_result = self.extract_text_detection_result(context);
198        let cropping_result = self.extract_cropping_result(context);
199        let recognition_result = self.extract_recognition_result(context);
200
201        // Build text regions by combining results from all stages
202        let text_regions = self.build_text_regions(
203            &text_detection_result,
204            &cropping_result,
205            &recognition_result,
206        )?;
207
208        // Calculate error metrics
209        let error_metrics = self.calculate_error_metrics(
210            &text_detection_result,
211            &cropping_result,
212            &recognition_result,
213        );
214
215        // Get the final processed image (may have been modified by orientation stage)
216        let rectified_img = if context.current_image.as_ptr() != context.original_image.as_ptr() {
217            Some(context.current_image.clone())
218        } else {
219            None
220        };
221
222        Ok(OAROCRResult {
223            input_path: Arc::from(image_path.to_string_lossy().as_ref()),
224            index: 0,
225            input_img: input_img_arc,
226            text_regions,
227            orientation_angle: orientation_result.and_then(|r| r.orientation_angle),
228            rectified_img,
229            error_metrics,
230        })
231    }
232
233    /// Extract orientation result from the stage context.
234    fn extract_orientation_result(&self, context: &StageContext) -> Option<OrientationResult> {
235        context
236            .get_stage_result::<OrientationResult>(&StageId("orientation".to_string()))
237            .cloned()
238    }
239
240    /// Extract text detection result from the stage context.
241    fn extract_text_detection_result(&self, context: &StageContext) -> Option<TextDetectionResult> {
242        context
243            .get_stage_result::<TextDetectionResult>(&StageId("text_detection".to_string()))
244            .cloned()
245    }
246
247    /// Extract cropping result from the stage context.
248    fn extract_cropping_result(&self, context: &StageContext) -> Option<CroppingResult> {
249        context
250            .get_stage_result::<CroppingResult>(&StageId("cropping".to_string()))
251            .cloned()
252    }
253
254    /// Extract recognition result from the stage context.
255    fn extract_recognition_result(&self, context: &StageContext) -> Option<RecognitionResult> {
256        context
257            .get_stage_result::<RecognitionResult>(&StageId("recognition".to_string()))
258            .cloned()
259    }
260
261    /// Build text regions by combining results from all stages.
262    fn build_text_regions(
263        &self,
264        text_detection_result: &Option<TextDetectionResult>,
265        _cropping_result: &Option<CroppingResult>,
266        recognition_result: &Option<RecognitionResult>,
267    ) -> Result<Vec<crate::pipeline::oarocr::TextRegion>, OCRError> {
268        use crate::pipeline::oarocr::TextRegion;
269
270        // Get text boxes from detection result
271        let empty_boxes = Vec::new();
272        let text_boxes = text_detection_result
273            .as_ref()
274            .map(|r| &r.text_boxes)
275            .unwrap_or(&empty_boxes);
276
277        if text_boxes.is_empty() {
278            return Ok(Vec::new());
279        }
280
281        // Build text regions
282        let mut text_regions = Vec::new();
283        for (i, bbox) in text_boxes.iter().enumerate() {
284            // Get recognition results if available
285            let (text, confidence) = if let Some(rec_result) = recognition_result {
286                let text = if i < rec_result.rec_texts.len() && !rec_result.rec_texts[i].is_empty()
287                {
288                    Some(rec_result.rec_texts[i].clone())
289                } else {
290                    None
291                };
292
293                let confidence =
294                    if i < rec_result.rec_scores.len() && rec_result.rec_scores[i] > 0.0 {
295                        Some(rec_result.rec_scores[i])
296                    } else {
297                        None
298                    };
299
300                (text, confidence)
301            } else {
302                (None, None)
303            };
304
305            // For now, we don't extract text line orientation from the extensible pipeline
306            // This could be added later if text line orientation stage is implemented
307            let orientation_angle = None;
308
309            let text_region =
310                TextRegion::with_all(bbox.clone(), text, confidence, orientation_angle);
311
312            text_regions.push(text_region);
313        }
314
315        Ok(text_regions)
316    }
317
318    /// Calculate error metrics from stage results.
319    fn calculate_error_metrics(
320        &self,
321        text_detection_result: &Option<TextDetectionResult>,
322        cropping_result: &Option<CroppingResult>,
323        recognition_result: &Option<RecognitionResult>,
324    ) -> crate::pipeline::oarocr::ErrorMetrics {
325        use crate::pipeline::oarocr::ErrorMetrics;
326
327        let total_text_boxes = text_detection_result
328            .as_ref()
329            .map(|r| r.text_boxes.len())
330            .unwrap_or(0);
331
332        let failed_crops = cropping_result
333            .as_ref()
334            .map(|r| r.failed_crops)
335            .unwrap_or(0);
336
337        let failed_recognitions = recognition_result
338            .as_ref()
339            .map(|r| r.failed_recognitions)
340            .unwrap_or(0);
341
342        // Text line orientation failures are not tracked in the current extensible pipeline
343        let failed_orientations = 0;
344
345        ErrorMetrics {
346            failed_crops,
347            failed_recognitions,
348            failed_orientations,
349            total_text_boxes,
350        }
351    }
352
353    /// Get the extensible pipeline configuration.
354    pub fn extensible_config(&self) -> &ExtensiblePipelineConfig {
355        &self.config
356    }
357
358    /// Get the original OAROCR configuration.
359    pub fn oarocr_config(&self) -> &OAROCRConfig {
360        &self.oarocr_config
361    }
362
363    /// Add a custom stage to the pipeline.
364    pub fn add_custom_stage<S, C>(&mut self, stage: S, config: Option<C>) -> Result<(), OCRError>
365    where
366        S: crate::pipeline::stages::PipelineStage<Config = C> + 'static,
367        C: Send
368            + Sync
369            + std::fmt::Debug
370            + Clone
371            + crate::core::config::ConfigValidator
372            + Default
373            + 'static,
374    {
375        self.pipeline.register_stage(stage, config)
376    }
377}
378
379/// Builder for creating ExtensibleOAROCR instances.
380pub struct ExtensibleOAROCRBuilder {
381    oarocr_config: OAROCRConfig,
382    extensible_config: Option<ExtensiblePipelineConfig>,
383}
384
385impl ExtensibleOAROCRBuilder {
386    /// Create a new builder with the given OAROCR configuration.
387    pub fn new(oarocr_config: OAROCRConfig) -> Self {
388        Self {
389            oarocr_config,
390            extensible_config: None,
391        }
392    }
393
394    /// Set the extensible pipeline configuration.
395    pub fn extensible_config(mut self, config: ExtensiblePipelineConfig) -> Self {
396        self.extensible_config = Some(config);
397        self
398    }
399
400    /// Use the default OCR pipeline configuration.
401    pub fn default_ocr_pipeline(mut self) -> Self {
402        self.extensible_config = Some(ExtensiblePipelineConfig::default());
403        self
404    }
405
406    /// Use the minimal pipeline configuration.
407    pub fn minimal_pipeline(mut self) -> Self {
408        self.extensible_config = Some(ExtensiblePipelineConfig::default());
409        self
410    }
411
412    /// Use the layout-aware pipeline configuration.
413    pub fn layout_aware_pipeline(mut self) -> Self {
414        self.extensible_config = Some(ExtensiblePipelineConfig::default());
415        self
416    }
417
418    /// Build the ExtensibleOAROCR instance.
419    pub fn build(self) -> Result<ExtensibleOAROCR, OCRError> {
420        let extensible_config = self.extensible_config.unwrap_or_default();
421
422        ExtensibleOAROCR::new(self.oarocr_config, extensible_config)
423    }
424}
425
426/// Utility functions for converting between pipeline formats.
427pub mod conversion {}
428
429#[cfg(test)]
430mod tests {
431    use super::*;
432    use crate::pipeline::stages::ExtensiblePipelineConfig;
433    use std::path::PathBuf;
434
435    #[test]
436    fn test_extensible_oarocr_creation() {
437        let oarocr_config = OAROCRConfig::default();
438        let extensible_config = ExtensiblePipelineConfig::default();
439
440        let result = ExtensibleOAROCR::new(oarocr_config, extensible_config);
441        assert!(result.is_ok());
442    }
443
444    #[test]
445    fn test_builder_pattern() {
446        let oarocr_config = OAROCRConfig::default();
447
448        let result = ExtensibleOAROCRBuilder::new(oarocr_config)
449            .default_ocr_pipeline()
450            .build();
451
452        assert!(result.is_ok());
453    }
454
455    #[test]
456    fn test_conversion_with_empty_results() {
457        let oarocr_config = OAROCRConfig::default();
458        let extensible_config = ExtensiblePipelineConfig::default();
459        let extensible_oarocr = ExtensibleOAROCR::new(oarocr_config, extensible_config).unwrap();
460
461        // Test conversion with empty stage results
462        let input_img = RgbImage::new(100, 100);
463        let input_img_arc = Arc::new(input_img);
464        let context = StageContext::new(input_img_arc.clone(), input_img_arc.clone(), 0);
465        let image_path = PathBuf::from("test.jpg");
466
467        let result = extensible_oarocr.convert_pipeline_results_to_oarocr(
468            &image_path,
469            input_img_arc,
470            &context,
471        );
472
473        assert!(result.is_ok());
474        let oarocr_result = result.unwrap();
475        assert_eq!(oarocr_result.text_regions.len(), 0);
476        assert_eq!(oarocr_result.orientation_angle, None);
477        assert_eq!(oarocr_result.error_metrics.total_text_boxes, 0);
478    }
479}