1mod encoding;
2mod extraction;
3mod flow;
4mod font;
5mod metrics;
6pub mod ocr;
7
8#[cfg(feature = "ocr-tesseract")]
9pub mod tesseract_provider;
10
11pub use encoding::TextEncoding;
12pub use extraction::{ExtractedText, ExtractionOptions, TextExtractor, TextFragment};
13pub use flow::{TextAlign, TextFlowContext};
14pub use font::{Font, FontFamily};
15pub use metrics::{measure_char, measure_text, split_into_words};
16pub use ocr::{
17 FragmentType, ImagePreprocessing, MockOcrProvider, OcrEngine, OcrError, OcrOptions,
18 OcrProcessingResult, OcrProvider, OcrResult, OcrTextFragment,
19};
20
21use crate::error::Result;
22use std::fmt::Write;
23
24#[derive(Clone)]
25pub struct TextContext {
26 operations: String,
27 current_font: Font,
28 font_size: f64,
29 text_matrix: [f64; 6],
30}
31
32impl Default for TextContext {
33 fn default() -> Self {
34 Self::new()
35 }
36}
37
38impl TextContext {
39 pub fn new() -> Self {
40 Self {
41 operations: String::new(),
42 current_font: Font::Helvetica,
43 font_size: 12.0,
44 text_matrix: [1.0, 0.0, 0.0, 1.0, 0.0, 0.0],
45 }
46 }
47
48 pub fn set_font(&mut self, font: Font, size: f64) -> &mut Self {
49 self.current_font = font;
50 self.font_size = size;
51 self
52 }
53
54 pub fn at(&mut self, x: f64, y: f64) -> &mut Self {
55 self.text_matrix[4] = x;
56 self.text_matrix[5] = y;
57 self
58 }
59
60 pub fn write(&mut self, text: &str) -> Result<&mut Self> {
61 self.operations.push_str("BT\n");
63
64 writeln!(
66 &mut self.operations,
67 "/{} {} Tf",
68 self.current_font.pdf_name(),
69 self.font_size
70 )
71 .unwrap();
72
73 writeln!(
75 &mut self.operations,
76 "{:.2} {:.2} Td",
77 self.text_matrix[4], self.text_matrix[5]
78 )
79 .unwrap();
80
81 let encoding = TextEncoding::WinAnsiEncoding;
83 let encoded_bytes = encoding.encode(text);
84
85 self.operations.push('(');
87 for &byte in &encoded_bytes {
88 match byte {
89 b'(' => self.operations.push_str("\\("),
90 b')' => self.operations.push_str("\\)"),
91 b'\\' => self.operations.push_str("\\\\"),
92 b'\n' => self.operations.push_str("\\n"),
93 b'\r' => self.operations.push_str("\\r"),
94 b'\t' => self.operations.push_str("\\t"),
95 0x20..=0x7E => self.operations.push(byte as char),
97 _ => write!(&mut self.operations, "\\{byte:03o}").unwrap(),
99 }
100 }
101 self.operations.push_str(") Tj\n");
102
103 self.operations.push_str("ET\n");
105
106 Ok(self)
107 }
108
109 pub fn write_line(&mut self, text: &str) -> Result<&mut Self> {
110 self.write(text)?;
111 self.text_matrix[5] -= self.font_size * 1.2; Ok(self)
113 }
114
115 pub fn set_character_spacing(&mut self, spacing: f64) -> &mut Self {
116 writeln!(&mut self.operations, "{spacing:.2} Tc").unwrap();
117 self
118 }
119
120 pub fn set_word_spacing(&mut self, spacing: f64) -> &mut Self {
121 writeln!(&mut self.operations, "{spacing:.2} Tw").unwrap();
122 self
123 }
124
125 pub fn set_horizontal_scaling(&mut self, scale: f64) -> &mut Self {
126 writeln!(&mut self.operations, "{:.2} Tz", scale * 100.0).unwrap();
127 self
128 }
129
130 pub fn set_leading(&mut self, leading: f64) -> &mut Self {
131 writeln!(&mut self.operations, "{leading:.2} TL").unwrap();
132 self
133 }
134
135 pub fn set_text_rise(&mut self, rise: f64) -> &mut Self {
136 writeln!(&mut self.operations, "{rise:.2} Ts").unwrap();
137 self
138 }
139
140 pub(crate) fn generate_operations(&self) -> Result<Vec<u8>> {
141 Ok(self.operations.as_bytes().to_vec())
142 }
143
144 pub fn current_font(&self) -> Font {
146 self.current_font
147 }
148
149 pub fn font_size(&self) -> f64 {
151 self.font_size
152 }
153
154 pub fn text_matrix(&self) -> [f64; 6] {
156 self.text_matrix
157 }
158
159 pub fn position(&self) -> (f64, f64) {
161 (self.text_matrix[4], self.text_matrix[5])
162 }
163
164 pub fn clear(&mut self) {
166 self.operations.clear();
167 }
168
169 pub fn operations(&self) -> &str {
171 &self.operations
172 }
173}
174
175#[cfg(test)]
176mod tests {
177 use super::*;
178
179 #[test]
180 fn test_text_context_new() {
181 let context = TextContext::new();
182 assert_eq!(context.current_font, Font::Helvetica);
183 assert_eq!(context.font_size, 12.0);
184 assert_eq!(context.text_matrix, [1.0, 0.0, 0.0, 1.0, 0.0, 0.0]);
185 assert!(context.operations.is_empty());
186 }
187
188 #[test]
189 fn test_text_context_default() {
190 let context = TextContext::default();
191 assert_eq!(context.current_font, Font::Helvetica);
192 assert_eq!(context.font_size, 12.0);
193 }
194
195 #[test]
196 fn test_set_font() {
197 let mut context = TextContext::new();
198 context.set_font(Font::TimesBold, 14.0);
199 assert_eq!(context.current_font, Font::TimesBold);
200 assert_eq!(context.font_size, 14.0);
201 }
202
203 #[test]
204 fn test_position() {
205 let mut context = TextContext::new();
206 context.at(100.0, 200.0);
207 let (x, y) = context.position();
208 assert_eq!(x, 100.0);
209 assert_eq!(y, 200.0);
210 assert_eq!(context.text_matrix[4], 100.0);
211 assert_eq!(context.text_matrix[5], 200.0);
212 }
213
214 #[test]
215 fn test_write_simple_text() {
216 let mut context = TextContext::new();
217 context.write("Hello").unwrap();
218
219 let ops = context.operations();
220 assert!(ops.contains("BT\n"));
221 assert!(ops.contains("ET\n"));
222 assert!(ops.contains("/Helvetica 12 Tf"));
223 assert!(ops.contains("(Hello) Tj"));
224 }
225
226 #[test]
227 fn test_write_text_with_escaping() {
228 let mut context = TextContext::new();
229 context.write("(Hello)").unwrap();
230
231 let ops = context.operations();
232 assert!(ops.contains("(\\(Hello\\)) Tj"));
233 }
234
235 #[test]
236 fn test_write_line() {
237 let mut context = TextContext::new();
238 let initial_y = context.text_matrix[5];
239 context.write_line("Line 1").unwrap();
240
241 let new_y = context.text_matrix[5];
243 assert!(new_y < initial_y);
244 assert_eq!(new_y, initial_y - 12.0 * 1.2); }
246
247 #[test]
248 fn test_character_spacing() {
249 let mut context = TextContext::new();
250 context.set_character_spacing(2.5);
251
252 let ops = context.operations();
253 assert!(ops.contains("2.50 Tc"));
254 }
255
256 #[test]
257 fn test_word_spacing() {
258 let mut context = TextContext::new();
259 context.set_word_spacing(1.5);
260
261 let ops = context.operations();
262 assert!(ops.contains("1.50 Tw"));
263 }
264
265 #[test]
266 fn test_horizontal_scaling() {
267 let mut context = TextContext::new();
268 context.set_horizontal_scaling(1.25);
269
270 let ops = context.operations();
271 assert!(ops.contains("125.00 Tz")); }
273
274 #[test]
275 fn test_leading() {
276 let mut context = TextContext::new();
277 context.set_leading(15.0);
278
279 let ops = context.operations();
280 assert!(ops.contains("15.00 TL"));
281 }
282
283 #[test]
284 fn test_text_rise() {
285 let mut context = TextContext::new();
286 context.set_text_rise(3.0);
287
288 let ops = context.operations();
289 assert!(ops.contains("3.00 Ts"));
290 }
291
292 #[test]
293 fn test_clear() {
294 let mut context = TextContext::new();
295 context.write("Hello").unwrap();
296 assert!(!context.operations().is_empty());
297
298 context.clear();
299 assert!(context.operations().is_empty());
300 }
301
302 #[test]
303 fn test_generate_operations() {
304 let mut context = TextContext::new();
305 context.write("Test").unwrap();
306
307 let ops_bytes = context.generate_operations().unwrap();
308 let ops_string = String::from_utf8(ops_bytes).unwrap();
309 assert_eq!(ops_string, context.operations());
310 }
311
312 #[test]
313 fn test_method_chaining() {
314 let mut context = TextContext::new();
315 context
316 .set_font(Font::Courier, 10.0)
317 .at(50.0, 100.0)
318 .set_character_spacing(1.0)
319 .set_word_spacing(2.0);
320
321 assert_eq!(context.current_font(), Font::Courier);
322 assert_eq!(context.font_size(), 10.0);
323 let (x, y) = context.position();
324 assert_eq!(x, 50.0);
325 assert_eq!(y, 100.0);
326 }
327
328 #[test]
329 fn test_text_matrix_access() {
330 let mut context = TextContext::new();
331 context.at(25.0, 75.0);
332
333 let matrix = context.text_matrix();
334 assert_eq!(matrix, [1.0, 0.0, 0.0, 1.0, 25.0, 75.0]);
335 }
336
337 #[test]
338 fn test_special_characters_encoding() {
339 let mut context = TextContext::new();
340 context.write("Test\nLine\tTab").unwrap();
341
342 let ops = context.operations();
343 assert!(ops.contains("\\n"));
344 assert!(ops.contains("\\t"));
345 }
346}