1use image::RgbImage;
4use rayon::prelude::*;
5use serde::{Deserialize, Serialize};
6
7use tracing::{debug, warn};
8
9use super::extensible::{PipelineStage, StageContext, StageData, StageDependency, StageId};
10use super::processor_helper::{BatchConfig, BatchProcessor};
11use super::types::{StageMetrics, StageResult};
12use crate::core::OCRError;
13use crate::core::config::ConfigValidator;
14use crate::pipeline::oarocr::ImageProcessor;
15use crate::processors::BoundingBox;
16
17#[derive(Debug, Clone)]
19pub struct CroppingResult {
20 pub cropped_images: Vec<Option<RgbImage>>,
22 pub failed_crops: usize,
24}
25
26#[derive(Debug, Clone, Serialize, Deserialize, Default)]
28pub struct CroppingConfig {
29 }
31
32impl ConfigValidator for CroppingConfig {
33 fn validate(&self) -> Result<(), crate::core::config::ConfigError> {
34 Ok(())
36 }
37
38 fn get_defaults() -> Self {
39 Self::default()
40 }
41}
42
43impl CroppingConfig {
44 pub fn new() -> Self {
46 Self::default()
47 }
48
49 pub fn effective_threshold(&self, policy_threshold: Option<usize>) -> usize {
51 policy_threshold.unwrap_or(10) }
53}
54
55pub struct CroppingStageProcessor;
62
63impl CroppingStageProcessor {
64 pub fn process_single(
76 image: &RgbImage,
77 text_boxes: &[BoundingBox],
78 config: Option<&CroppingConfig>,
79 ) -> Result<StageResult<CroppingResult>, OCRError> {
80 Self::process_single_with_policy(image, text_boxes, config, None)
81 }
82
83 pub fn process_single_with_policy(
96 image: &RgbImage,
97 text_boxes: &[BoundingBox],
98 config: Option<&CroppingConfig>,
99 policy_threshold: Option<usize>,
100 ) -> Result<StageResult<CroppingResult>, OCRError> {
101 use std::time::Instant;
102 let start_time = Instant::now();
103 let default_config = CroppingConfig::default();
104 let config = config.unwrap_or(&default_config);
105
106 debug!("Processing {} text boxes for cropping", text_boxes.len());
107
108 if text_boxes.is_empty() {
109 let metrics = StageMetrics::new(0, 0)
110 .with_processing_time(start_time.elapsed())
111 .with_info("stage", "cropping")
112 .with_info("text_boxes", "0");
113
114 return Ok(StageResult::new(
115 CroppingResult {
116 cropped_images: Vec::new(),
117 failed_crops: 0,
118 },
119 metrics,
120 ));
121 }
122
123 let effective_threshold = config.effective_threshold(policy_threshold);
125 let use_parallel = text_boxes.len() > effective_threshold;
126 let cropping_results: Vec<(usize, Result<RgbImage, OCRError>)> = if use_parallel {
127 debug!(
128 "Using parallel cropping for {} text boxes",
129 text_boxes.len()
130 );
131 text_boxes
132 .par_iter()
133 .enumerate()
134 .map(|(idx, bbox)| {
135 let crop_result = Self::crop_bounding_box(image, bbox);
136 (idx, crop_result)
137 })
138 .collect()
139 } else {
140 debug!(
141 "Using sequential cropping for {} text boxes",
142 text_boxes.len()
143 );
144 text_boxes
145 .iter()
146 .enumerate()
147 .map(|(idx, bbox)| {
148 let crop_result = Self::crop_bounding_box(image, bbox);
149 (idx, crop_result)
150 })
151 .collect()
152 };
153
154 let mut failed_crops = 0;
156 let cropped_images: Vec<Option<RgbImage>> = cropping_results
157 .into_iter()
158 .map(|(idx, crop_result)| match crop_result {
159 Ok(img) => Some(img),
160 Err(e) => {
161 failed_crops += 1;
162 warn!(
163 "Failed to crop text box {} with {} points: {}",
164 idx,
165 text_boxes[idx].points.len(),
166 e
167 );
168 None
169 }
170 })
171 .collect();
172
173 let success_count = text_boxes.len() - failed_crops;
174 let result = CroppingResult {
175 cropped_images,
176 failed_crops,
177 };
178
179 let metrics = StageMetrics::new(success_count, failed_crops)
180 .with_processing_time(start_time.elapsed())
181 .with_info("stage", "cropping")
182 .with_info("text_boxes", text_boxes.len().to_string())
183 .with_info("parallel_processing", use_parallel.to_string());
184
185 Ok(StageResult::new(result, metrics))
186 }
187
188 fn crop_bounding_box(image: &RgbImage, bbox: &BoundingBox) -> Result<RgbImage, OCRError> {
201 if bbox.points.len() == 4 {
202 ImageProcessor::crop_rotated_bounding_box(image, bbox)
204 } else {
205 ImageProcessor::crop_bounding_box(image, bbox)
207 }
208 }
209
210 pub fn process_batch(
221 images_and_boxes: Vec<(&RgbImage, &[BoundingBox])>,
222 config: Option<&CroppingConfig>,
223 ) -> Result<StageResult<Vec<CroppingResult>>, OCRError> {
224 Self::process_batch_with_policy(images_and_boxes, config, None)
225 }
226
227 pub fn process_batch_with_policy(
239 images_and_boxes: Vec<(&RgbImage, &[BoundingBox])>,
240 config: Option<&CroppingConfig>,
241 _policy_threshold: Option<usize>,
242 ) -> Result<StageResult<Vec<CroppingResult>>, OCRError> {
243 let batch_config = BatchConfig::new("cropping_batch").with_fallback_results(true);
244
245 let processor = BatchProcessor::new(&batch_config);
246
247 let owned_data: Vec<(RgbImage, Vec<BoundingBox>)> = images_and_boxes
249 .into_iter()
250 .map(|(image, boxes)| (image.clone(), boxes.to_vec()))
251 .collect();
252
253 let result = processor.process_items(
254 owned_data,
255 |(image, text_boxes)| {
256 Self::process_single(&image, &text_boxes, config).map(|stage_result| {
257 (
259 stage_result.data,
260 stage_result.metrics.success_count,
261 stage_result.metrics.failure_count,
262 )
263 })
264 },
265 |e, index| {
266 warn!("Cropping processing failed for image {}: {}", index, e);
267 Some((
270 CroppingResult {
271 cropped_images: Vec::new(),
272 failed_crops: 0,
273 },
274 0,
275 1,
276 ))
277 },
278 )?;
279
280 let mut cropping_results = Vec::new();
282 let mut total_success = 0;
283 let mut total_failures = 0;
284
285 for (cropping_result, success_count, failure_count) in result.data {
286 cropping_results.push(cropping_result);
287 total_success += success_count;
288 total_failures += failure_count;
289 }
290
291 let mut updated_metrics = result.metrics;
293 updated_metrics.success_count = total_success;
294 updated_metrics.failure_count = total_failures;
295 updated_metrics
296 .additional_info
297 .insert("batch_size".to_string(), cropping_results.len().to_string());
298
299 Ok(StageResult::new(cropping_results, updated_metrics))
300 }
301}
302
303#[derive(Debug)]
305pub struct ExtensibleCroppingStage;
306
307impl ExtensibleCroppingStage {
308 pub fn new() -> Self {
310 Self
311 }
312}
313
314impl Default for ExtensibleCroppingStage {
315 fn default() -> Self {
316 Self::new()
317 }
318}
319
320impl PipelineStage for ExtensibleCroppingStage {
321 type Config = CroppingConfig;
322 type Result = CroppingResult;
323
324 fn stage_id(&self) -> StageId {
325 StageId::new("cropping")
326 }
327
328 fn stage_name(&self) -> &str {
329 "Text Box Cropping"
330 }
331
332 fn dependencies(&self) -> Vec<StageDependency> {
333 vec![StageDependency::Requires(StageId::new("text_detection"))]
335 }
336
337 fn is_enabled(&self, context: &StageContext, _config: Option<&Self::Config>) -> bool {
338 context
340 .get_stage_result::<Vec<BoundingBox>>(&StageId::new("text_detection"))
341 .is_some()
342 }
343
344 fn process(
345 &self,
346 context: &mut StageContext,
347 data: StageData,
348 config: Option<&Self::Config>,
349 ) -> Result<StageResult<Self::Result>, OCRError> {
350 let text_boxes = context
352 .get_stage_result::<Vec<BoundingBox>>(&StageId::new("text_detection"))
353 .ok_or_else(|| {
354 OCRError::processing_error(
355 crate::core::ProcessingStage::Generic,
356 "Text boxes not found in context",
357 crate::core::errors::SimpleError::new("Missing text detection results"),
358 )
359 })?;
360
361 let cropping_config = config.cloned().unwrap_or_default();
362
363 let stage_result = CroppingStageProcessor::process_single(
364 &data.image,
365 text_boxes,
366 Some(&cropping_config),
367 )?;
368
369 Ok(stage_result)
370 }
371
372 fn validate_config(&self, config: &Self::Config) -> Result<(), OCRError> {
373 config.validate().map_err(|e| OCRError::ConfigError {
374 message: format!("CroppingConfig validation failed: {}", e),
375 })
376 }
377
378 fn default_config(&self) -> Self::Config {
379 CroppingConfig::get_defaults()
380 }
381}
382
383#[cfg(test)]
384mod tests {
385 use super::*;
386
387 #[test]
388 fn test_cropping_config_effective_threshold() {
389 let config = CroppingConfig::new();
390
391 assert_eq!(config.effective_threshold(Some(5)), 5);
393
394 assert_eq!(config.effective_threshold(None), 10);
396 }
397
398 #[test]
399 fn test_cropping_config_serialization() {
400 let config = CroppingConfig::new();
401
402 let serialized = serde_json::to_string(&config).unwrap();
403 let deserialized: CroppingConfig = serde_json::from_str(&serialized).unwrap();
404
405 assert_eq!(
407 config.effective_threshold(None),
408 deserialized.effective_threshold(None)
409 );
410 }
411}