1mod encoding;
2mod extraction;
3mod flow;
4mod font;
5mod metrics;
6pub mod ocr;
7
8#[cfg(feature = "ocr-tesseract")]
9pub mod tesseract_provider;
10
11pub use encoding::TextEncoding;
12pub use extraction::{ExtractedText, ExtractionOptions, TextExtractor, TextFragment};
13pub use flow::{TextAlign, TextFlowContext};
14pub use font::{Font, FontFamily};
15pub use metrics::{measure_char, measure_text, split_into_words};
16pub use ocr::{
17 FragmentType, ImagePreprocessing, MockOcrProvider, OcrEngine, OcrError, OcrOptions,
18 OcrProcessingResult, OcrProvider, OcrResult, OcrTextFragment,
19};
20
21use crate::error::Result;
22use std::fmt::Write;
23
24#[derive(Clone)]
25pub struct TextContext {
26 operations: String,
27 current_font: Font,
28 font_size: f64,
29 text_matrix: [f64; 6],
30}
31
32impl Default for TextContext {
33 fn default() -> Self {
34 Self::new()
35 }
36}
37
38impl TextContext {
39 pub fn new() -> Self {
40 Self {
41 operations: String::new(),
42 current_font: Font::Helvetica,
43 font_size: 12.0,
44 text_matrix: [1.0, 0.0, 0.0, 1.0, 0.0, 0.0],
45 }
46 }
47
48 pub fn set_font(&mut self, font: Font, size: f64) -> &mut Self {
49 self.current_font = font;
50 self.font_size = size;
51 self
52 }
53
54 pub fn at(&mut self, x: f64, y: f64) -> &mut Self {
55 self.text_matrix[4] = x;
56 self.text_matrix[5] = y;
57 self
58 }
59
60 pub fn write(&mut self, text: &str) -> Result<&mut Self> {
61 self.operations.push_str("BT\n");
63
64 writeln!(
66 &mut self.operations,
67 "/{} {} Tf",
68 self.current_font.pdf_name(),
69 self.font_size
70 )
71 .unwrap();
72
73 writeln!(
75 &mut self.operations,
76 "{:.2} {:.2} Td",
77 self.text_matrix[4], self.text_matrix[5]
78 )
79 .unwrap();
80
81 let encoding = TextEncoding::WinAnsiEncoding;
83 let encoded_bytes = encoding.encode(text);
84
85 self.operations.push('(');
87 for &byte in &encoded_bytes {
88 match byte {
89 b'(' => self.operations.push_str("\\("),
90 b')' => self.operations.push_str("\\)"),
91 b'\\' => self.operations.push_str("\\\\"),
92 b'\n' => self.operations.push_str("\\n"),
93 b'\r' => self.operations.push_str("\\r"),
94 b'\t' => self.operations.push_str("\\t"),
95 0x20..=0x7E => self.operations.push(byte as char),
97 _ => write!(&mut self.operations, "\\{byte:03o}").unwrap(),
99 }
100 }
101 self.operations.push_str(") Tj\n");
102
103 self.operations.push_str("ET\n");
105
106 Ok(self)
107 }
108
109 pub fn write_line(&mut self, text: &str) -> Result<&mut Self> {
110 self.write(text)?;
111 self.text_matrix[5] -= self.font_size * 1.2; Ok(self)
113 }
114
115 pub fn set_character_spacing(&mut self, spacing: f64) -> &mut Self {
116 writeln!(&mut self.operations, "{spacing:.2} Tc").unwrap();
117 self
118 }
119
120 pub fn set_word_spacing(&mut self, spacing: f64) -> &mut Self {
121 writeln!(&mut self.operations, "{spacing:.2} Tw").unwrap();
122 self
123 }
124
125 pub fn set_horizontal_scaling(&mut self, scale: f64) -> &mut Self {
126 writeln!(&mut self.operations, "{:.2} Tz", scale * 100.0).unwrap();
127 self
128 }
129
130 pub fn set_leading(&mut self, leading: f64) -> &mut Self {
131 writeln!(&mut self.operations, "{leading:.2} TL").unwrap();
132 self
133 }
134
135 pub fn set_text_rise(&mut self, rise: f64) -> &mut Self {
136 writeln!(&mut self.operations, "{rise:.2} Ts").unwrap();
137 self
138 }
139
140 pub(crate) fn generate_operations(&self) -> Result<Vec<u8>> {
141 Ok(self.operations.as_bytes().to_vec())
142 }
143}