oar_ocr/pipeline/oarocr/
result.rs

1//! Result types for the OAROCR pipeline.
2
3use crate::processors::BoundingBox;
4use image::RgbImage;
5use std::fmt;
6use std::sync::Arc;
7
8/// A text region containing detection and recognition results.
9///
10/// This struct groups together all the information related to a single detected text region,
11/// including the bounding box, recognized text, confidence score, and orientation angle.
12/// This design eliminates the need for parallel vectors and provides better ergonomics
13/// for iterating over text regions.
14#[derive(Debug, Clone)]
15pub struct TextRegion {
16    /// The bounding box of the detected text region.
17    pub bounding_box: BoundingBox,
18    /// The recognized text, if recognition was successful.
19    /// None indicates that recognition failed or was filtered out due to low confidence.
20    pub text: Option<Arc<str>>,
21    /// The confidence score for the recognized text.
22    /// None indicates that recognition failed or was filtered out due to low confidence.
23    pub confidence: Option<f32>,
24    /// The text line orientation angle, if orientation classification was performed.
25    /// None indicates that orientation classification was not performed or failed.
26    pub orientation_angle: Option<f32>,
27}
28
29impl TextRegion {
30    /// Creates a new TextRegion with the given bounding box.
31    ///
32    /// The text, confidence, and orientation_angle are initially set to None.
33    pub fn new(bounding_box: BoundingBox) -> Self {
34        Self {
35            bounding_box,
36            text: None,
37            confidence: None,
38            orientation_angle: None,
39        }
40    }
41
42    /// Creates a new TextRegion with detection and recognition results.
43    pub fn with_recognition(
44        bounding_box: BoundingBox,
45        text: Option<Arc<str>>,
46        confidence: Option<f32>,
47    ) -> Self {
48        Self {
49            bounding_box,
50            text,
51            confidence,
52            orientation_angle: None,
53        }
54    }
55
56    /// Creates a new TextRegion with all fields specified.
57    pub fn with_all(
58        bounding_box: BoundingBox,
59        text: Option<Arc<str>>,
60        confidence: Option<f32>,
61        orientation_angle: Option<f32>,
62    ) -> Self {
63        Self {
64            bounding_box,
65            text,
66            confidence,
67            orientation_angle,
68        }
69    }
70
71    /// Returns true if this text region has recognized text.
72    pub fn has_text(&self) -> bool {
73        self.text.is_some()
74    }
75
76    /// Returns true if this text region has a confidence score.
77    pub fn has_confidence(&self) -> bool {
78        self.confidence.is_some()
79    }
80
81    /// Returns true if this text region has an orientation angle.
82    pub fn has_orientation(&self) -> bool {
83        self.orientation_angle.is_some()
84    }
85
86    /// Returns the text and confidence as a tuple if both are available.
87    pub fn text_with_confidence(&self) -> Option<(&str, f32)> {
88        match (&self.text, self.confidence) {
89            (Some(text), Some(confidence)) => Some((text, confidence)),
90            _ => None,
91        }
92    }
93}
94
95/// Result of the OAROCR pipeline execution.
96///
97/// This struct contains all the results from processing an image through
98/// the OCR pipeline, including detected text boxes, recognized text, and
99/// any intermediate processing results.
100#[derive(Debug, Clone)]
101pub struct OAROCRResult {
102    /// Path to the input image file.
103    pub input_path: Arc<str>,
104    /// Index of the image in a batch (0 for single image processing).
105    pub index: usize,
106    /// The input image.
107    pub input_img: Arc<RgbImage>,
108    /// Structured text regions containing detection and recognition results.
109    /// This is the modern, preferred way to access OCR results as it groups related data together.
110    pub text_regions: Vec<TextRegion>,
111    /// Document orientation angle (if orientation classification was used).
112    pub orientation_angle: Option<f32>,
113    /// Rectified image (if document unwarping was used).
114    pub rectified_img: Option<Arc<RgbImage>>,
115    /// Error metrics for data quality monitoring.
116    pub error_metrics: ErrorMetrics,
117}
118
119impl OAROCRResult {
120    /// Creates text regions from parallel vectors.
121    ///
122    /// This is a helper method used internally during result construction.
123    pub(crate) fn create_text_regions_from_vectors(
124        text_boxes: &[BoundingBox],
125        rec_texts: &[Option<Arc<str>>],
126        rec_scores: &[Option<f32>],
127        text_line_orientation_angles: &[Option<f32>],
128    ) -> Vec<TextRegion> {
129        text_boxes
130            .iter()
131            .enumerate()
132            .map(|(i, bbox)| {
133                let text = rec_texts.get(i).and_then(|t| t.clone());
134                let confidence = rec_scores.get(i).and_then(|s| *s);
135                let orientation_angle = text_line_orientation_angles.get(i).and_then(|a| *a);
136
137                TextRegion::with_all(bbox.clone(), text, confidence, orientation_angle)
138            })
139            .collect()
140    }
141
142    /// Returns an iterator over text regions that have recognized text.
143    pub fn recognized_text_regions(&self) -> impl Iterator<Item = &TextRegion> {
144        self.text_regions.iter().filter(|region| region.has_text())
145    }
146
147    /// Returns an iterator over text regions with both text and confidence scores.
148    pub fn confident_text_regions(&self) -> impl Iterator<Item = &TextRegion> {
149        self.text_regions
150            .iter()
151            .filter(|region| region.has_confidence())
152    }
153
154    /// Returns all recognized text as a vector of strings.
155    pub fn all_text(&self) -> Vec<&str> {
156        self.text_regions
157            .iter()
158            .filter_map(|region| region.text.as_ref().map(|s| s.as_ref()))
159            .collect()
160    }
161
162    /// Returns all recognized text concatenated with the specified separator.
163    pub fn concatenated_text(&self, separator: &str) -> String {
164        self.all_text().join(separator)
165    }
166
167    /// Returns the number of text regions that have recognized text.
168    pub fn recognized_text_count(&self) -> usize {
169        self.text_regions
170            .iter()
171            .filter(|region| region.has_text())
172            .count()
173    }
174
175    /// Returns the average confidence score of all recognized text regions.
176    pub fn average_confidence(&self) -> Option<f32> {
177        let confident_regions: Vec<_> = self.confident_text_regions().collect();
178        if confident_regions.is_empty() {
179            None
180        } else {
181            let sum: f32 = confident_regions
182                .iter()
183                .filter_map(|region| region.confidence)
184                .sum();
185            Some(sum / confident_regions.len() as f32)
186        }
187    }
188}
189
190/// Error metrics for monitoring data quality and model performance issues.
191#[derive(Debug, Clone, Default)]
192pub struct ErrorMetrics {
193    /// Number of text boxes that failed to crop.
194    pub failed_crops: usize,
195    /// Number of text recognition failures.
196    pub failed_recognitions: usize,
197    /// Number of text line orientation classification failures.
198    pub failed_orientations: usize,
199    /// Total number of text boxes detected.
200    pub total_text_boxes: usize,
201}
202
203impl ErrorMetrics {
204    /// Creates a new ErrorMetrics instance.
205    pub fn new() -> Self {
206        Self::default()
207    }
208
209    /// Returns the success rate for cropping operations (0.0 to 1.0).
210    pub fn crop_success_rate(&self) -> f32 {
211        if self.total_text_boxes == 0 {
212            1.0
213        } else {
214            self.total_text_boxes.saturating_sub(self.failed_crops) as f32
215                / self.total_text_boxes as f32
216        }
217    }
218
219    /// Returns the success rate for recognition operations (0.0 to 1.0).
220    pub fn recognition_success_rate(&self) -> f32 {
221        let successful_crops = self.total_text_boxes.saturating_sub(self.failed_crops);
222        if successful_crops == 0 {
223            1.0
224        } else {
225            successful_crops.saturating_sub(self.failed_recognitions) as f32
226                / successful_crops as f32
227        }
228    }
229
230    /// Returns the success rate for orientation classification (0.0 to 1.0).
231    pub fn orientation_success_rate(&self) -> f32 {
232        let successful_crops = self.total_text_boxes.saturating_sub(self.failed_crops);
233        if successful_crops == 0 {
234            1.0
235        } else {
236            successful_crops.saturating_sub(self.failed_orientations) as f32
237                / successful_crops as f32
238        }
239    }
240
241    /// Returns true if there are any errors that indicate data quality issues.
242    pub fn has_quality_issues(&self) -> bool {
243        self.failed_crops > 0 || self.failed_recognitions > 0 || self.failed_orientations > 0
244    }
245}
246
247impl fmt::Display for OAROCRResult {
248    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
249        writeln!(f, "Input path: {}", self.input_path)?;
250        writeln!(f, "Page index: {}", self.index)?;
251        writeln!(
252            f,
253            "Image dimensions: [{}, {}]",
254            self.input_img.width(),
255            self.input_img.height()
256        )?;
257
258        if let Some(angle) = self.orientation_angle {
259            writeln!(f, "Orientation angle: {angle:.1}°")?;
260        } else {
261            writeln!(f, "Orientation angle: not detected")?;
262        }
263
264        writeln!(f, "Total text regions: {}", self.text_regions.len())?;
265        writeln!(f, "Recognized texts: {}", self.recognized_text_count())?;
266
267        if !self.text_regions.is_empty() {
268            writeln!(f, "Text regions (detection + recognition):")?;
269
270            // Use the new structured text regions for cleaner iteration
271            for (region_index, region) in self.text_regions.iter().enumerate() {
272                write!(f, "  Region {}: ", region_index + 1)?;
273
274                // Display bounding box
275                let bbox = &region.bounding_box;
276                if bbox.points.is_empty() {
277                    write!(f, "[] (empty)")?;
278                } else {
279                    write!(f, "[")?;
280                    for (j, point) in bbox.points.iter().enumerate() {
281                        if j == 0 {
282                            write!(f, "[{:.0}, {:.0}]", point.x, point.y)?;
283                        } else {
284                            write!(f, ", [{:.0}, {:.0}]", point.x, point.y)?;
285                        }
286                    }
287                    write!(f, "]")?;
288                }
289
290                // Display recognition result if available
291                match (&region.text, region.confidence) {
292                    (Some(text), Some(score)) => {
293                        let orientation_str = match region.orientation_angle {
294                            Some(angle) => format!(" (orientation: {angle:.1}°)"),
295                            None => String::new(),
296                        };
297                        writeln!(f, " -> '{text}' (confidence: {score:.3}){orientation_str}")?;
298                    }
299                    _ => {
300                        writeln!(f, " -> [no text recognized]")?;
301                    }
302                }
303            }
304        }
305
306        if let Some(rectified_img) = &self.rectified_img {
307            writeln!(
308                f,
309                "Rectified image: available [{} x {}]",
310                rectified_img.width(),
311                rectified_img.height()
312            )?;
313        } else {
314            writeln!(
315                f,
316                "Rectified image: not available (document unwarping not enabled)"
317            )?;
318        }
319
320        // Display error metrics if there are any quality issues
321        if self.error_metrics.has_quality_issues() {
322            writeln!(f, "Error metrics:")?;
323            writeln!(
324                f,
325                "  Failed crops: {}/{} ({:.1}% success)",
326                self.error_metrics.failed_crops,
327                self.error_metrics.total_text_boxes,
328                self.error_metrics.crop_success_rate() * 100.0
329            )?;
330            writeln!(
331                f,
332                "  Failed recognitions: {} ({:.1}% success)",
333                self.error_metrics.failed_recognitions,
334                self.error_metrics.recognition_success_rate() * 100.0
335            )?;
336            writeln!(
337                f,
338                "  Failed orientations: {} ({:.1}% success)",
339                self.error_metrics.failed_orientations,
340                self.error_metrics.orientation_success_rate() * 100.0
341            )?;
342        }
343
344        Ok(())
345    }
346}
347
348#[cfg(test)]
349mod tests {
350    use super::*;
351
352    #[test]
353    fn test_crop_success_rate_zero_total() {
354        let metrics = ErrorMetrics {
355            failed_crops: 0,
356            failed_recognitions: 0,
357            failed_orientations: 0,
358            total_text_boxes: 0,
359        };
360        assert_eq!(metrics.crop_success_rate(), 1.0);
361    }
362
363    #[test]
364    fn test_has_quality_issues_no_issues() {
365        let metrics = ErrorMetrics {
366            failed_crops: 0,
367            failed_recognitions: 0,
368            failed_orientations: 0,
369            total_text_boxes: 10,
370        };
371        assert!(!metrics.has_quality_issues());
372    }
373
374    #[test]
375    fn test_has_quality_issues_with_failures() {
376        let metrics = ErrorMetrics {
377            failed_crops: 1,
378            failed_recognitions: 0,
379            failed_orientations: 0,
380            total_text_boxes: 10,
381        };
382        assert!(metrics.has_quality_issues());
383    }
384}