1use image::RgbImage;
7use std::path::Path;
8use std::sync::Arc;
9use tracing::{debug, info};
10
11use crate::core::OCRError;
12use crate::pipeline::oarocr::{OAROCRConfig, OAROCRResult};
13use crate::pipeline::stages::{
14 CroppingConfig, CroppingResult, ExtensibleCroppingStage, ExtensibleOrientationStage,
15 ExtensiblePipeline, ExtensiblePipelineConfig, ExtensibleRecognitionStage,
16 ExtensibleTextDetectionStage, ExtensibleTextLineOrientationStage, OrientationConfig,
17 OrientationResult, RecognitionConfig, RecognitionResult, StageContext, StageData, StageId,
18 TextDetectionConfig, TextDetectionResult, TextLineOrientationConfig,
19};
20
21pub struct ExtensibleOAROCR {
23 pipeline: ExtensiblePipeline,
25 config: ExtensiblePipelineConfig,
27 oarocr_config: OAROCRConfig,
29}
30
31impl ExtensibleOAROCR {
32 pub fn new(
34 oarocr_config: OAROCRConfig,
35 extensible_config: ExtensiblePipelineConfig,
36 ) -> Result<Self, OCRError> {
37 let mut pipeline = ExtensiblePipeline::new();
38
39 Self::register_standard_stages(&mut pipeline, &oarocr_config, &extensible_config)?;
41
42 Ok(Self {
43 pipeline,
44 config: extensible_config,
45 oarocr_config,
46 })
47 }
48
49 fn register_standard_stages(
51 pipeline: &mut ExtensiblePipeline,
52 oarocr_config: &OAROCRConfig,
53 extensible_config: &ExtensiblePipelineConfig,
54 ) -> Result<(), OCRError> {
55 if extensible_config.is_stage_enabled("orientation") {
57 let orientation_stage = ExtensibleOrientationStage::new(None); let orientation_config = extensible_config
59 .get_stage_config::<OrientationConfig>("orientation")
60 .or_else(|| oarocr_config.orientation_stage.as_ref().cloned());
61 pipeline.register_stage(orientation_stage, orientation_config)?;
62 debug!("Registered orientation stage");
63 }
64
65 if extensible_config.is_stage_enabled("text_detection") {
67 let detection_stage = ExtensibleTextDetectionStage::new(None); let detection_config = extensible_config
69 .get_stage_config::<TextDetectionConfig>("text_detection")
70 .unwrap_or_default();
71 pipeline.register_stage(detection_stage, Some(detection_config))?;
72 debug!("Registered text detection stage");
73 }
74
75 if extensible_config.is_stage_enabled("cropping") {
77 let cropping_stage = ExtensibleCroppingStage::new();
78 let cropping_config = extensible_config
79 .get_stage_config::<CroppingConfig>("cropping")
80 .unwrap_or_default();
81 pipeline.register_stage(cropping_stage, Some(cropping_config))?;
82 debug!("Registered cropping stage");
83 }
84
85 if extensible_config.is_stage_enabled("text_line_orientation") {
87 let text_line_stage = ExtensibleTextLineOrientationStage::new(None); let text_line_config = extensible_config
89 .get_stage_config::<TextLineOrientationConfig>("text_line_orientation")
90 .or_else(|| oarocr_config.text_line_orientation_stage.as_ref().cloned());
91 pipeline.register_stage(text_line_stage, text_line_config)?;
92 debug!("Registered text line orientation stage");
93 }
94
95 if extensible_config.is_stage_enabled("recognition") {
97 let recognition_stage = ExtensibleRecognitionStage::new(None); let recognition_config = extensible_config
99 .get_stage_config::<RecognitionConfig>("recognition")
100 .unwrap_or_else(|| {
101 RecognitionConfig::from_legacy_config(
102 oarocr_config.use_textline_orientation,
103 oarocr_config.aspect_ratio_bucketing.clone(),
104 )
105 });
106 pipeline.register_stage(recognition_stage, Some(recognition_config))?;
107 debug!("Registered recognition stage");
108 }
109
110 Ok(())
111 }
112
113 pub fn process_image(&mut self, image_path: &Path) -> Result<OAROCRResult, OCRError> {
115 info!(
116 "Processing image with extensible pipeline: {:?}",
117 image_path
118 );
119
120 let input_img = crate::utils::load_image(image_path)?;
122 let input_img_arc = Arc::new(input_img.clone());
123
124 let context = StageContext::new(input_img_arc.clone(), input_img_arc.clone(), 0);
126
127 let initial_data = StageData::new(input_img);
129
130 self.execute_pipeline_and_convert(image_path, input_img_arc, context, initial_data)
133 }
134
135 fn execute_pipeline_and_convert(
137 &mut self,
138 image_path: &Path,
139 input_img_arc: Arc<RgbImage>,
140 mut context: StageContext,
141 initial_data: StageData,
142 ) -> Result<OAROCRResult, OCRError> {
143 let execution_order = self.pipeline.registry_mut().resolve_execution_order()?;
145 let mut current_data = initial_data;
146
147 info!("Executing pipeline with {} stages", execution_order.len());
148
149 for stage_id in execution_order {
150 let stage = self
151 .pipeline
152 .registry()
153 .get_stage(&stage_id)
154 .ok_or_else(|| OCRError::ConfigError {
155 message: format!("Stage not found: {}", stage_id.as_str()),
156 })?;
157
158 let config = self.pipeline.registry().get_config(&stage_id);
159
160 if !stage.is_enabled(&context, config) {
162 debug!("Skipping disabled stage: {}", stage.stage_name());
163 continue;
164 }
165
166 debug!("Executing stage: {}", stage.stage_name());
167
168 let stage_result = stage.process(&mut context, current_data, config)?;
170
171 context.set_stage_result(stage_id.clone(), stage_result.data);
173
174 current_data = StageData::new(context.current_image.as_ref().clone());
176
177 debug!(
178 "Stage {} completed in {:?}",
179 stage.stage_name(),
180 stage_result.metrics.processing_time
181 );
182 }
183
184 self.convert_pipeline_results_to_oarocr(image_path, input_img_arc, &context)
186 }
187
188 fn convert_pipeline_results_to_oarocr(
190 &self,
191 image_path: &Path,
192 input_img_arc: Arc<RgbImage>,
193 context: &StageContext,
194 ) -> Result<OAROCRResult, OCRError> {
195 let orientation_result = self.extract_orientation_result(context);
197 let text_detection_result = self.extract_text_detection_result(context);
198 let cropping_result = self.extract_cropping_result(context);
199 let recognition_result = self.extract_recognition_result(context);
200
201 let text_regions = self.build_text_regions(
203 &text_detection_result,
204 &cropping_result,
205 &recognition_result,
206 )?;
207
208 let error_metrics = self.calculate_error_metrics(
210 &text_detection_result,
211 &cropping_result,
212 &recognition_result,
213 );
214
215 let rectified_img = if context.current_image.as_ptr() != context.original_image.as_ptr() {
217 Some(context.current_image.clone())
218 } else {
219 None
220 };
221
222 Ok(OAROCRResult {
223 input_path: Arc::from(image_path.to_string_lossy().as_ref()),
224 index: 0,
225 input_img: input_img_arc,
226 text_regions,
227 orientation_angle: orientation_result.and_then(|r| r.orientation_angle),
228 rectified_img,
229 error_metrics,
230 })
231 }
232
233 fn extract_orientation_result(&self, context: &StageContext) -> Option<OrientationResult> {
235 context
236 .get_stage_result::<OrientationResult>(&StageId("orientation".to_string()))
237 .cloned()
238 }
239
240 fn extract_text_detection_result(&self, context: &StageContext) -> Option<TextDetectionResult> {
242 context
243 .get_stage_result::<TextDetectionResult>(&StageId("text_detection".to_string()))
244 .cloned()
245 }
246
247 fn extract_cropping_result(&self, context: &StageContext) -> Option<CroppingResult> {
249 context
250 .get_stage_result::<CroppingResult>(&StageId("cropping".to_string()))
251 .cloned()
252 }
253
254 fn extract_recognition_result(&self, context: &StageContext) -> Option<RecognitionResult> {
256 context
257 .get_stage_result::<RecognitionResult>(&StageId("recognition".to_string()))
258 .cloned()
259 }
260
261 fn build_text_regions(
263 &self,
264 text_detection_result: &Option<TextDetectionResult>,
265 _cropping_result: &Option<CroppingResult>,
266 recognition_result: &Option<RecognitionResult>,
267 ) -> Result<Vec<crate::pipeline::oarocr::TextRegion>, OCRError> {
268 use crate::pipeline::oarocr::TextRegion;
269
270 let empty_boxes = Vec::new();
272 let text_boxes = text_detection_result
273 .as_ref()
274 .map(|r| &r.text_boxes)
275 .unwrap_or(&empty_boxes);
276
277 if text_boxes.is_empty() {
278 return Ok(Vec::new());
279 }
280
281 let mut text_regions = Vec::new();
283 for (i, bbox) in text_boxes.iter().enumerate() {
284 let (text, confidence) = if let Some(rec_result) = recognition_result {
286 let text = if i < rec_result.rec_texts.len() && !rec_result.rec_texts[i].is_empty()
287 {
288 Some(rec_result.rec_texts[i].clone())
289 } else {
290 None
291 };
292
293 let confidence =
294 if i < rec_result.rec_scores.len() && rec_result.rec_scores[i] > 0.0 {
295 Some(rec_result.rec_scores[i])
296 } else {
297 None
298 };
299
300 (text, confidence)
301 } else {
302 (None, None)
303 };
304
305 let orientation_angle = None;
308
309 let text_region =
310 TextRegion::with_all(bbox.clone(), text, confidence, orientation_angle);
311
312 text_regions.push(text_region);
313 }
314
315 Ok(text_regions)
316 }
317
318 fn calculate_error_metrics(
320 &self,
321 text_detection_result: &Option<TextDetectionResult>,
322 cropping_result: &Option<CroppingResult>,
323 recognition_result: &Option<RecognitionResult>,
324 ) -> crate::pipeline::oarocr::ErrorMetrics {
325 use crate::pipeline::oarocr::ErrorMetrics;
326
327 let total_text_boxes = text_detection_result
328 .as_ref()
329 .map(|r| r.text_boxes.len())
330 .unwrap_or(0);
331
332 let failed_crops = cropping_result
333 .as_ref()
334 .map(|r| r.failed_crops)
335 .unwrap_or(0);
336
337 let failed_recognitions = recognition_result
338 .as_ref()
339 .map(|r| r.failed_recognitions)
340 .unwrap_or(0);
341
342 let failed_orientations = 0;
344
345 ErrorMetrics {
346 failed_crops,
347 failed_recognitions,
348 failed_orientations,
349 total_text_boxes,
350 }
351 }
352
353 pub fn extensible_config(&self) -> &ExtensiblePipelineConfig {
355 &self.config
356 }
357
358 pub fn oarocr_config(&self) -> &OAROCRConfig {
360 &self.oarocr_config
361 }
362
363 pub fn add_custom_stage<S, C>(&mut self, stage: S, config: Option<C>) -> Result<(), OCRError>
365 where
366 S: crate::pipeline::stages::PipelineStage<Config = C> + 'static,
367 C: Send
368 + Sync
369 + std::fmt::Debug
370 + Clone
371 + crate::core::config::ConfigValidator
372 + Default
373 + 'static,
374 {
375 self.pipeline.register_stage(stage, config)
376 }
377}
378
379pub struct ExtensibleOAROCRBuilder {
381 oarocr_config: OAROCRConfig,
382 extensible_config: Option<ExtensiblePipelineConfig>,
383}
384
385impl ExtensibleOAROCRBuilder {
386 pub fn new(oarocr_config: OAROCRConfig) -> Self {
388 Self {
389 oarocr_config,
390 extensible_config: None,
391 }
392 }
393
394 pub fn extensible_config(mut self, config: ExtensiblePipelineConfig) -> Self {
396 self.extensible_config = Some(config);
397 self
398 }
399
400 pub fn default_ocr_pipeline(mut self) -> Self {
402 self.extensible_config = Some(ExtensiblePipelineConfig::default());
403 self
404 }
405
406 pub fn minimal_pipeline(mut self) -> Self {
408 self.extensible_config = Some(ExtensiblePipelineConfig::default());
409 self
410 }
411
412 pub fn layout_aware_pipeline(mut self) -> Self {
414 self.extensible_config = Some(ExtensiblePipelineConfig::default());
415 self
416 }
417
418 pub fn build(self) -> Result<ExtensibleOAROCR, OCRError> {
420 let extensible_config = self.extensible_config.unwrap_or_default();
421
422 ExtensibleOAROCR::new(self.oarocr_config, extensible_config)
423 }
424}
425
426pub mod conversion {}
428
429#[cfg(test)]
430mod tests {
431 use super::*;
432 use crate::pipeline::stages::ExtensiblePipelineConfig;
433 use std::path::PathBuf;
434
435 #[test]
436 fn test_extensible_oarocr_creation() {
437 let oarocr_config = OAROCRConfig::default();
438 let extensible_config = ExtensiblePipelineConfig::default();
439
440 let result = ExtensibleOAROCR::new(oarocr_config, extensible_config);
441 assert!(result.is_ok());
442 }
443
444 #[test]
445 fn test_builder_pattern() {
446 let oarocr_config = OAROCRConfig::default();
447
448 let result = ExtensibleOAROCRBuilder::new(oarocr_config)
449 .default_ocr_pipeline()
450 .build();
451
452 assert!(result.is_ok());
453 }
454
455 #[test]
456 fn test_conversion_with_empty_results() {
457 let oarocr_config = OAROCRConfig::default();
458 let extensible_config = ExtensiblePipelineConfig::default();
459 let extensible_oarocr = ExtensibleOAROCR::new(oarocr_config, extensible_config).unwrap();
460
461 let input_img = RgbImage::new(100, 100);
463 let input_img_arc = Arc::new(input_img);
464 let context = StageContext::new(input_img_arc.clone(), input_img_arc.clone(), 0);
465 let image_path = PathBuf::from("test.jpg");
466
467 let result = extensible_oarocr.convert_pipeline_results_to_oarocr(
468 &image_path,
469 input_img_arc,
470 &context,
471 );
472
473 assert!(result.is_ok());
474 let oarocr_result = result.unwrap();
475 assert_eq!(oarocr_result.text_regions.len(), 0);
476 assert_eq!(oarocr_result.orientation_angle, None);
477 assert_eq!(oarocr_result.error_metrics.total_text_boxes, 0);
478 }
479}