use serde::{Deserialize, Serialize};
use std::collections::HashMap;
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
#[serde(tag = "type", rename_all = "snake_case")]
#[cfg_attr(feature = "api", derive(utoipa::ToSchema))]
pub enum OcrBoundingGeometry {
Rectangle {
left: u32,
top: u32,
width: u32,
height: u32,
},
Quadrilateral {
points: [(u32, u32); 4],
},
}
impl OcrBoundingGeometry {
pub fn to_aabb(&self) -> (u32, u32, u32, u32) {
match self {
Self::Rectangle {
left,
top,
width,
height,
} => (*left, *top, *width, *height),
Self::Quadrilateral { points } => {
let min_x = points.iter().map(|(x, _)| *x).min().unwrap_or(0);
let max_x = points.iter().map(|(x, _)| *x).max().unwrap_or(0);
let min_y = points.iter().map(|(_, y)| *y).min().unwrap_or(0);
let max_y = points.iter().map(|(_, y)| *y).max().unwrap_or(0);
(min_x, min_y, max_x.saturating_sub(min_x), max_y.saturating_sub(min_y))
}
}
}
pub fn center(&self) -> (f64, f64) {
let (left, top, width, height) = self.to_aabb();
(left as f64 + width as f64 / 2.0, top as f64 + height as f64 / 2.0)
}
pub fn overlaps(&self, other: &Self) -> bool {
let (l1, t1, w1, h1) = self.to_aabb();
let (l2, t2, w2, h2) = other.to_aabb();
let r1 = l1 + w1;
let b1 = t1 + h1;
let r2 = l2 + w2;
let b2 = t2 + h2;
l1 < r2 && r1 > l2 && t1 < b2 && b1 > t2
}
}
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
#[cfg_attr(feature = "api", derive(utoipa::ToSchema))]
pub struct OcrConfidence {
#[serde(skip_serializing_if = "Option::is_none", default)]
pub detection: Option<f64>,
pub recognition: f64,
}
impl OcrConfidence {
pub fn from_tesseract(confidence: f64) -> Self {
Self {
detection: None,
recognition: (confidence / 100.0).clamp(0.0, 1.0),
}
}
pub fn from_paddle(box_score: f32, text_score: f32) -> Self {
Self {
detection: Some((box_score as f64).clamp(0.0, 1.0)),
recognition: (text_score as f64).clamp(0.0, 1.0),
}
}
}
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
#[cfg_attr(feature = "api", derive(utoipa::ToSchema))]
pub struct OcrRotation {
pub angle_degrees: f64,
#[serde(skip_serializing_if = "Option::is_none")]
pub confidence: Option<f64>,
}
impl OcrRotation {
pub fn from_paddle(angle_index: i32, angle_score: f32) -> std::result::Result<Self, String> {
if !(0..=3).contains(&angle_index) {
return Err(format!(
"Invalid angle_index: {}. Must be 0-3 (representing 0°, 90°, 180°, 270°)",
angle_index
));
}
Ok(Self {
angle_degrees: match angle_index {
0 => 0.0,
1 => 180.0,
2 => 90.0,
3 => 270.0,
_ => unreachable!(), },
confidence: Some((angle_score as f64).clamp(0.0, 1.0)),
})
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, Default)]
#[cfg_attr(feature = "api", derive(utoipa::ToSchema))]
#[serde(rename_all = "snake_case")]
pub enum OcrElementLevel {
Word,
#[default]
Line,
Block,
Page,
}
impl OcrElementLevel {
pub fn from_tesseract_level(level: i32) -> Self {
match level {
1 => Self::Page,
2 => Self::Block,
3 => Self::Block, 4 => Self::Line,
5 => Self::Word,
_ => Self::Line,
}
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
#[cfg_attr(feature = "api", derive(utoipa::ToSchema))]
pub struct OcrElement {
pub text: String,
pub geometry: OcrBoundingGeometry,
pub confidence: OcrConfidence,
#[serde(default)]
pub level: OcrElementLevel,
#[serde(skip_serializing_if = "Option::is_none")]
pub rotation: Option<OcrRotation>,
#[serde(default = "default_page_number")]
pub page_number: usize,
#[serde(skip_serializing_if = "Option::is_none")]
pub parent_id: Option<String>,
#[serde(default, skip_serializing_if = "HashMap::is_empty")]
pub backend_metadata: HashMap<String, serde_json::Value>,
}
fn default_page_number() -> usize {
1
}
impl OcrElement {
pub fn new(text: impl Into<String>, geometry: OcrBoundingGeometry, confidence: OcrConfidence) -> Self {
Self {
text: text.into(),
geometry,
confidence,
level: OcrElementLevel::default(),
rotation: None,
page_number: 1,
parent_id: None,
backend_metadata: HashMap::new(),
}
}
pub fn with_level(mut self, level: OcrElementLevel) -> Self {
self.level = level;
self
}
pub fn with_rotation(mut self, rotation: OcrRotation) -> Self {
self.rotation = Some(rotation);
self
}
pub fn with_page_number(mut self, page_number: usize) -> Self {
self.page_number = page_number;
self
}
pub fn with_parent_id(mut self, parent_id: impl Into<String>) -> Self {
self.parent_id = Some(parent_id.into());
self
}
pub fn with_metadata(mut self, key: impl Into<String>, value: serde_json::Value) -> Self {
self.backend_metadata.insert(key.into(), value);
self
}
}
#[derive(Debug, Clone, Serialize, Deserialize, Default)]
#[cfg_attr(feature = "api", derive(utoipa::ToSchema))]
pub struct OcrElementConfig {
#[serde(default)]
pub include_elements: bool,
#[serde(default)]
pub min_level: OcrElementLevel,
#[serde(default)]
pub min_confidence: f64,
#[serde(default)]
pub build_hierarchy: bool,
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_rectangle_to_aabb() {
let geom = OcrBoundingGeometry::Rectangle {
left: 10,
top: 20,
width: 100,
height: 50,
};
assert_eq!(geom.to_aabb(), (10, 20, 100, 50));
}
#[test]
fn test_quadrilateral_to_aabb() {
let geom = OcrBoundingGeometry::Quadrilateral {
points: [(10, 22), (108, 20), (110, 72), (12, 74)],
};
let (left, top, width, height) = geom.to_aabb();
assert_eq!(left, 10);
assert_eq!(top, 20);
assert_eq!(width, 100);
assert_eq!(height, 54);
}
#[test]
fn test_confidence_from_tesseract() {
let conf = OcrConfidence::from_tesseract(85.0);
assert!(conf.detection.is_none());
assert!((conf.recognition - 0.85).abs() < 0.001);
}
#[test]
fn test_confidence_from_paddle() {
let conf = OcrConfidence::from_paddle(0.95, 0.88);
assert!(conf.detection.is_some());
assert!((conf.detection.unwrap() - 0.95).abs() < 0.001);
assert!((conf.recognition - 0.88).abs() < 0.001);
}
#[test]
fn test_rotation_from_paddle() {
let rot = OcrRotation::from_paddle(1, 0.92).expect("Valid angle_index");
assert_eq!(rot.angle_degrees, 180.0);
assert!(rot.confidence.is_some());
assert!((rot.confidence.unwrap() - 0.92).abs() < 0.001);
}
#[test]
fn test_rotation_from_paddle_invalid_angle_index() {
assert!(OcrRotation::from_paddle(-1, 0.92).is_err());
assert!(OcrRotation::from_paddle(4, 0.92).is_err());
assert!(OcrRotation::from_paddle(100, 0.92).is_err());
assert!(OcrRotation::from_paddle(0, 0.92).is_ok());
assert!(OcrRotation::from_paddle(1, 0.92).is_ok());
assert!(OcrRotation::from_paddle(2, 0.92).is_ok());
assert!(OcrRotation::from_paddle(3, 0.92).is_ok());
}
#[test]
fn test_element_level_from_tesseract() {
assert_eq!(OcrElementLevel::from_tesseract_level(1), OcrElementLevel::Page);
assert_eq!(OcrElementLevel::from_tesseract_level(2), OcrElementLevel::Block);
assert_eq!(OcrElementLevel::from_tesseract_level(3), OcrElementLevel::Block);
assert_eq!(OcrElementLevel::from_tesseract_level(4), OcrElementLevel::Line);
assert_eq!(OcrElementLevel::from_tesseract_level(5), OcrElementLevel::Word);
}
#[test]
fn test_ocr_element_builder() {
let geom = OcrBoundingGeometry::Rectangle {
left: 0,
top: 0,
width: 100,
height: 20,
};
let conf = OcrConfidence::from_tesseract(90.0);
let element = OcrElement::new("Hello", geom, conf)
.with_level(OcrElementLevel::Word)
.with_page_number(2)
.with_metadata("backend", serde_json::json!("tesseract"));
assert_eq!(element.text, "Hello");
assert_eq!(element.level, OcrElementLevel::Word);
assert_eq!(element.page_number, 2);
assert!(element.backend_metadata.contains_key("backend"));
}
#[test]
fn test_geometry_overlaps() {
let geom1 = OcrBoundingGeometry::Rectangle {
left: 0,
top: 0,
width: 100,
height: 50,
};
let geom2 = OcrBoundingGeometry::Rectangle {
left: 50,
top: 25,
width: 100,
height: 50,
};
let geom3 = OcrBoundingGeometry::Rectangle {
left: 200,
top: 0,
width: 50,
height: 50,
};
assert!(geom1.overlaps(&geom2));
assert!(!geom1.overlaps(&geom3));
}
#[test]
fn test_geometry_center() {
let geom = OcrBoundingGeometry::Rectangle {
left: 0,
top: 0,
width: 100,
height: 50,
};
let (cx, cy) = geom.center();
assert!((cx - 50.0).abs() < 0.001);
assert!((cy - 25.0).abs() < 0.001);
}
#[test]
fn test_serialization_roundtrip() {
let geom = OcrBoundingGeometry::Quadrilateral {
points: [(10, 20), (100, 22), (98, 70), (8, 68)],
};
let conf = OcrConfidence::from_paddle(0.95, 0.88);
let rot = OcrRotation::from_paddle(0, 0.99).expect("Valid angle_index");
let element = OcrElement::new("Test text", geom, conf)
.with_rotation(rot)
.with_level(OcrElementLevel::Line);
let json = serde_json::to_string(&element).expect("Failed to serialize");
let deserialized: OcrElement = serde_json::from_str(&json).expect("Failed to deserialize");
assert_eq!(deserialized.text, element.text);
assert_eq!(deserialized.level, element.level);
assert!(deserialized.rotation.is_some());
}
}