Skip to main content

rlx_ocr/
text.rs

1// RLX — versatile ML compiler + runtime.
2// Copyright (C) 2026 Eugene Hauptmann, Nataliya Kosmyna.
3//
4// This program is free software: you can redistribute it and/or modify
5// it under the terms of the GNU General Public License as published by
6// the Free Software Foundation, version 3.
7//
8// This program is distributed in the hope that it will be useful,
9// but WITHOUT ANY WARRANTY; without even the implied warranty of
10// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11// GNU General Public License for more details.
12//
13// You should have received a copy of the GNU General Public License
14// along with this program. If not, see <https://www.gnu.org/licenses/>.
15
16//! Recognized text items with character-level bounding boxes.
17
18use rten_imageproc::{BoundingRect, Rect, RotatedRect};
19
20/// A single recognized character with its axis-aligned bounding box.
21#[derive(Clone, Debug, PartialEq)]
22pub struct TextChar {
23    pub char: char,
24    pub rect: Rect,
25}
26
27/// A word composed of one or more characters.
28#[derive(Clone, Debug, Default, PartialEq)]
29pub struct TextWord {
30    pub chars: Vec<TextChar>,
31}
32
33impl TextWord {
34    pub fn text(&self) -> String {
35        self.chars.iter().map(|c| c.char).collect()
36    }
37}
38
39/// A line of text composed of words.
40#[derive(Clone, Debug, Default, PartialEq)]
41pub struct TextLine {
42    pub words: Vec<TextWord>,
43}
44
45impl TextLine {
46    pub fn new(chars: Vec<TextChar>) -> Self {
47        Self {
48            words: vec![TextWord { chars }],
49        }
50    }
51
52    pub fn text(&self) -> String {
53        self.words
54            .iter()
55            .map(TextWord::text)
56            .collect::<Vec<_>>()
57            .join(" ")
58    }
59}
60
61/// Any recognized text item (line, word, or character).
62#[derive(Clone, Debug, PartialEq)]
63pub enum TextItem {
64    Line(TextLine),
65    Word(TextWord),
66    Char(TextChar),
67}
68
69impl TextItem {
70    pub fn as_line(&self) -> Option<&TextLine> {
71        match self {
72            Self::Line(l) => Some(l),
73            _ => None,
74        }
75    }
76}
77
78impl std::fmt::Display for TextLine {
79    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
80        write!(f, "{}", self.text())
81    }
82}
83
84/// Convenience: bounding box union of word rects in a line.
85pub fn line_bounding_rect(words: &[RotatedRect]) -> Option<Rect> {
86    words.iter().fold(None, |br: Option<Rect>, r| match br {
87        Some(br) => Some(br.union(r.bounding_rect().integral_bounding_rect())),
88        None => Some(r.bounding_rect().integral_bounding_rect()),
89    })
90}