processors_rs/pdf/tesseract/
output_boxes.rs

1use crate::pdf::tesseract::error::TessResult;
2use crate::pdf::tesseract::input::{Args, Image};
3use crate::pdf::tesseract::parse_line_util::{parse_next, FromLine};
4use core::fmt;
5
6#[derive(Debug, PartialEq)]
7pub struct BoxOutput {
8    pub output: String,
9    pub boxes: Vec<Box>,
10}
11
12impl fmt::Display for BoxOutput {
13    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
14        write!(f, "{}", self.output)
15    }
16}
17
18#[derive(Debug, PartialEq)]
19pub struct Box {
20    pub symbol: String,
21    pub left: i32,
22    pub bottom: i32,
23    pub right: i32,
24    pub top: i32,
25    pub page: i32,
26}
27
28impl fmt::Display for Box {
29    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
30        write!(
31            f,
32            "{} {} {} {} {} {}",
33            self.symbol, self.left, self.bottom, self.right, self.top, self.page
34        )
35    }
36}
37
38impl FromLine for Box {
39    fn from_line(line: &str) -> Option<Self> {
40        let mut x = line.split_whitespace();
41
42        Some(Box {
43            symbol: x.next()?.to_string(),
44            left: parse_next(&mut x)?,
45            bottom: parse_next(&mut x)?,
46            right: parse_next(&mut x)?,
47            top: parse_next(&mut x)?,
48            page: parse_next(&mut x)?,
49        })
50    }
51}
52
53pub fn image_to_boxes(image: &Image, args: &Args) -> TessResult<BoxOutput> {
54    let mut command = crate::pdf::tesseract::command::create_tesseract_command(image, args)?;
55    command.arg("makebox");
56
57    let output = crate::pdf::tesseract::command::run_tesseract_command(&mut command)?;
58    let boxes = string_to_boxes(&output)?;
59    Ok(BoxOutput { output, boxes })
60}
61
62fn string_to_boxes(output: &str) -> TessResult<Vec<Box>> {
63    output.lines().map(Box::parse).collect::<_>()
64}
65
66#[cfg(test)]
67mod tests {
68    use crate::pdf::tesseract::{
69        error::TessError,
70        output_boxes::{string_to_boxes, Box},
71    };
72
73    #[test]
74    fn test_string_to_boxes() {
75        let result = string_to_boxes("L 18 26 36 59 0");
76        assert_eq!(
77            *result.unwrap().first().unwrap(),
78            Box {
79                symbol: String::from("L"),
80                left: 18,
81                bottom: 26,
82                right: 36,
83                top: 59,
84                page: 0
85            }
86        )
87    }
88
89    #[test]
90    fn test_string_to_boxes_parse_error() {
91        let result = string_to_boxes("L 18 X 36 59 0");
92        assert_eq!(
93            result,
94            Err(TessError::ParseError(
95                "invalid line 'L 18 X 36 59 0'".into()
96            ))
97        )
98    }
99}