processors_rs/pdf/tesseract/
output_boxes.rs1use crate::pdf::tesseract::error::TessResult;
2use crate::pdf::tesseract::input::{Args, Image};
3use crate::pdf::tesseract::parse_line_util::{parse_next, FromLine};
4use core::fmt;
5
6#[derive(Debug, PartialEq)]
7pub struct BoxOutput {
8 pub output: String,
9 pub boxes: Vec<Box>,
10}
11
12impl fmt::Display for BoxOutput {
13 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
14 write!(f, "{}", self.output)
15 }
16}
17
18#[derive(Debug, PartialEq)]
19pub struct Box {
20 pub symbol: String,
21 pub left: i32,
22 pub bottom: i32,
23 pub right: i32,
24 pub top: i32,
25 pub page: i32,
26}
27
28impl fmt::Display for Box {
29 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
30 write!(
31 f,
32 "{} {} {} {} {} {}",
33 self.symbol, self.left, self.bottom, self.right, self.top, self.page
34 )
35 }
36}
37
38impl FromLine for Box {
39 fn from_line(line: &str) -> Option<Self> {
40 let mut x = line.split_whitespace();
41
42 Some(Box {
43 symbol: x.next()?.to_string(),
44 left: parse_next(&mut x)?,
45 bottom: parse_next(&mut x)?,
46 right: parse_next(&mut x)?,
47 top: parse_next(&mut x)?,
48 page: parse_next(&mut x)?,
49 })
50 }
51}
52
53pub fn image_to_boxes(image: &Image, args: &Args) -> TessResult<BoxOutput> {
54 let mut command = crate::pdf::tesseract::command::create_tesseract_command(image, args)?;
55 command.arg("makebox");
56
57 let output = crate::pdf::tesseract::command::run_tesseract_command(&mut command)?;
58 let boxes = string_to_boxes(&output)?;
59 Ok(BoxOutput { output, boxes })
60}
61
62fn string_to_boxes(output: &str) -> TessResult<Vec<Box>> {
63 output.lines().map(Box::parse).collect::<_>()
64}
65
66#[cfg(test)]
67mod tests {
68 use crate::pdf::tesseract::{
69 error::TessError,
70 output_boxes::{string_to_boxes, Box},
71 };
72
73 #[test]
74 fn test_string_to_boxes() {
75 let result = string_to_boxes("L 18 26 36 59 0");
76 assert_eq!(
77 *result.unwrap().first().unwrap(),
78 Box {
79 symbol: String::from("L"),
80 left: 18,
81 bottom: 26,
82 right: 36,
83 top: 59,
84 page: 0
85 }
86 )
87 }
88
89 #[test]
90 fn test_string_to_boxes_parse_error() {
91 let result = string_to_boxes("L 18 X 36 59 0");
92 assert_eq!(
93 result,
94 Err(TessError::ParseError(
95 "invalid line 'L 18 X 36 59 0'".into()
96 ))
97 )
98 }
99}