1pub mod cmap;
2mod encoding;
3mod extraction;
4mod extraction_cmap;
5mod flow;
6mod font;
7pub mod fonts;
8mod header_footer;
9mod layout;
10mod list;
11mod metrics;
12pub mod ocr;
13mod table;
14mod table_advanced;
15
16#[cfg(test)]
17mod cmap_tests;
18
19#[cfg(feature = "ocr-tesseract")]
20pub mod tesseract_provider;
21
22pub use encoding::TextEncoding;
23pub use extraction::{ExtractedText, ExtractionOptions, TextExtractor, TextFragment};
24pub use flow::{TextAlign, TextFlowContext};
25pub use font::{Font, FontEncoding, FontFamily, FontWithEncoding};
26pub use header_footer::{HeaderFooter, HeaderFooterOptions, HeaderFooterPosition};
27pub use layout::{ColumnContent, ColumnLayout, ColumnOptions, TextFormat};
28pub use list::{
29 BulletStyle, ListElement, ListItem, ListOptions, ListStyle as ListStyleEnum, OrderedList,
30 OrderedListStyle, UnorderedList,
31};
32pub use metrics::{measure_char, measure_text, split_into_words};
33pub use ocr::{
34 FragmentType, ImagePreprocessing, MockOcrProvider, OcrEngine, OcrError, OcrOptions,
35 OcrProcessingResult, OcrProvider, OcrResult, OcrTextFragment,
36};
37pub use table::{HeaderStyle, Table, TableCell, TableOptions};
38pub use table_advanced::{
39 AdvancedTable, AdvancedTableCell, AdvancedTableOptions, AlternatingRowColors, BorderLine,
40 BorderStyle, CellContent, CellPadding, ColumnDefinition, ColumnWidth, LineStyle, TableRow,
41 VerticalAlign,
42};
43
44use crate::error::Result;
45use std::fmt::Write;
46
47#[derive(Clone, Copy, Debug, PartialEq, Eq)]
49pub enum TextRenderingMode {
50 Fill = 0,
52 Stroke = 1,
54 FillStroke = 2,
56 Invisible = 3,
58 FillClip = 4,
60 StrokeClip = 5,
62 FillStrokeClip = 6,
64 Clip = 7,
66}
67
68#[derive(Clone)]
69pub struct TextContext {
70 operations: String,
71 current_font: Font,
72 font_size: f64,
73 text_matrix: [f64; 6],
74 character_spacing: Option<f64>,
76 word_spacing: Option<f64>,
77 horizontal_scaling: Option<f64>,
78 leading: Option<f64>,
79 text_rise: Option<f64>,
80 rendering_mode: Option<TextRenderingMode>,
81}
82
83impl Default for TextContext {
84 fn default() -> Self {
85 Self::new()
86 }
87}
88
89impl TextContext {
90 pub fn new() -> Self {
91 Self {
92 operations: String::new(),
93 current_font: Font::Helvetica,
94 font_size: 12.0,
95 text_matrix: [1.0, 0.0, 0.0, 1.0, 0.0, 0.0],
96 character_spacing: None,
97 word_spacing: None,
98 horizontal_scaling: None,
99 leading: None,
100 text_rise: None,
101 rendering_mode: None,
102 }
103 }
104
105 pub fn set_font(&mut self, font: Font, size: f64) -> &mut Self {
106 self.current_font = font;
107 self.font_size = size;
108 self
109 }
110
111 pub(crate) fn current_font(&self) -> &Font {
113 &self.current_font
114 }
115
116 pub fn at(&mut self, x: f64, y: f64) -> &mut Self {
117 self.text_matrix[4] = x;
118 self.text_matrix[5] = y;
119 self
120 }
121
122 pub fn write(&mut self, text: &str) -> Result<&mut Self> {
123 self.operations.push_str("BT\n");
125
126 writeln!(
128 &mut self.operations,
129 "/{} {} Tf",
130 self.current_font.pdf_name(),
131 self.font_size
132 )
133 .unwrap();
134
135 self.apply_text_state_parameters();
137
138 writeln!(
140 &mut self.operations,
141 "{:.2} {:.2} Td",
142 self.text_matrix[4], self.text_matrix[5]
143 )
144 .unwrap();
145
146 let encoding = TextEncoding::WinAnsiEncoding;
148 let encoded_bytes = encoding.encode(text);
149
150 self.operations.push('(');
152 for &byte in &encoded_bytes {
153 match byte {
154 b'(' => self.operations.push_str("\\("),
155 b')' => self.operations.push_str("\\)"),
156 b'\\' => self.operations.push_str("\\\\"),
157 b'\n' => self.operations.push_str("\\n"),
158 b'\r' => self.operations.push_str("\\r"),
159 b'\t' => self.operations.push_str("\\t"),
160 0x20..=0x7E => self.operations.push(byte as char),
162 _ => write!(&mut self.operations, "\\{byte:03o}").unwrap(),
164 }
165 }
166 self.operations.push_str(") Tj\n");
167
168 self.operations.push_str("ET\n");
170
171 Ok(self)
172 }
173
174 pub fn write_line(&mut self, text: &str) -> Result<&mut Self> {
175 self.write(text)?;
176 self.text_matrix[5] -= self.font_size * 1.2; Ok(self)
178 }
179
180 pub fn set_character_spacing(&mut self, spacing: f64) -> &mut Self {
181 self.character_spacing = Some(spacing);
182 self
183 }
184
185 pub fn set_word_spacing(&mut self, spacing: f64) -> &mut Self {
186 self.word_spacing = Some(spacing);
187 self
188 }
189
190 pub fn set_horizontal_scaling(&mut self, scale: f64) -> &mut Self {
191 self.horizontal_scaling = Some(scale);
192 self
193 }
194
195 pub fn set_leading(&mut self, leading: f64) -> &mut Self {
196 self.leading = Some(leading);
197 self
198 }
199
200 pub fn set_text_rise(&mut self, rise: f64) -> &mut Self {
201 self.text_rise = Some(rise);
202 self
203 }
204
205 pub fn set_rendering_mode(&mut self, mode: TextRenderingMode) -> &mut Self {
207 self.rendering_mode = Some(mode);
208 self
209 }
210
211 fn apply_text_state_parameters(&mut self) {
213 if let Some(spacing) = self.character_spacing {
215 writeln!(&mut self.operations, "{:.2} Tc", spacing).unwrap();
216 }
217
218 if let Some(spacing) = self.word_spacing {
220 writeln!(&mut self.operations, "{:.2} Tw", spacing).unwrap();
221 }
222
223 if let Some(scale) = self.horizontal_scaling {
225 writeln!(&mut self.operations, "{:.2} Tz", scale * 100.0).unwrap();
226 }
227
228 if let Some(leading) = self.leading {
230 writeln!(&mut self.operations, "{:.2} TL", leading).unwrap();
231 }
232
233 if let Some(rise) = self.text_rise {
235 writeln!(&mut self.operations, "{:.2} Ts", rise).unwrap();
236 }
237
238 if let Some(mode) = self.rendering_mode {
240 writeln!(&mut self.operations, "{} Tr", mode as u8).unwrap();
241 }
242 }
243
244 pub(crate) fn generate_operations(&self) -> Result<Vec<u8>> {
245 Ok(self.operations.as_bytes().to_vec())
246 }
247
248 pub fn font_size(&self) -> f64 {
250 self.font_size
251 }
252
253 pub fn text_matrix(&self) -> [f64; 6] {
255 self.text_matrix
256 }
257
258 pub fn position(&self) -> (f64, f64) {
260 (self.text_matrix[4], self.text_matrix[5])
261 }
262
263 pub fn clear(&mut self) {
265 self.operations.clear();
266 self.character_spacing = None;
267 self.word_spacing = None;
268 self.horizontal_scaling = None;
269 self.leading = None;
270 self.text_rise = None;
271 self.rendering_mode = None;
272 }
273
274 pub fn operations(&self) -> &str {
276 &self.operations
277 }
278
279 #[cfg(test)]
281 pub fn generate_text_state_operations(&self) -> String {
282 let mut ops = String::new();
283
284 if let Some(spacing) = self.character_spacing {
286 writeln!(&mut ops, "{:.2} Tc", spacing).unwrap();
287 }
288
289 if let Some(spacing) = self.word_spacing {
291 writeln!(&mut ops, "{:.2} Tw", spacing).unwrap();
292 }
293
294 if let Some(scale) = self.horizontal_scaling {
296 writeln!(&mut ops, "{:.2} Tz", scale * 100.0).unwrap();
297 }
298
299 if let Some(leading) = self.leading {
301 writeln!(&mut ops, "{:.2} TL", leading).unwrap();
302 }
303
304 if let Some(rise) = self.text_rise {
306 writeln!(&mut ops, "{:.2} Ts", rise).unwrap();
307 }
308
309 if let Some(mode) = self.rendering_mode {
311 writeln!(&mut ops, "{} Tr", mode as u8).unwrap();
312 }
313
314 ops
315 }
316}
317
318#[cfg(test)]
319mod tests {
320 use super::*;
321
322 #[test]
323 fn test_text_context_new() {
324 let context = TextContext::new();
325 assert_eq!(context.current_font, Font::Helvetica);
326 assert_eq!(context.font_size, 12.0);
327 assert_eq!(context.text_matrix, [1.0, 0.0, 0.0, 1.0, 0.0, 0.0]);
328 assert!(context.operations.is_empty());
329 }
330
331 #[test]
332 fn test_text_context_default() {
333 let context = TextContext::default();
334 assert_eq!(context.current_font, Font::Helvetica);
335 assert_eq!(context.font_size, 12.0);
336 }
337
338 #[test]
339 fn test_set_font() {
340 let mut context = TextContext::new();
341 context.set_font(Font::TimesBold, 14.0);
342 assert_eq!(context.current_font, Font::TimesBold);
343 assert_eq!(context.font_size, 14.0);
344 }
345
346 #[test]
347 fn test_position() {
348 let mut context = TextContext::new();
349 context.at(100.0, 200.0);
350 let (x, y) = context.position();
351 assert_eq!(x, 100.0);
352 assert_eq!(y, 200.0);
353 assert_eq!(context.text_matrix[4], 100.0);
354 assert_eq!(context.text_matrix[5], 200.0);
355 }
356
357 #[test]
358 fn test_write_simple_text() {
359 let mut context = TextContext::new();
360 context.write("Hello").unwrap();
361
362 let ops = context.operations();
363 assert!(ops.contains("BT\n"));
364 assert!(ops.contains("ET\n"));
365 assert!(ops.contains("/Helvetica 12 Tf"));
366 assert!(ops.contains("(Hello) Tj"));
367 }
368
369 #[test]
370 fn test_write_text_with_escaping() {
371 let mut context = TextContext::new();
372 context.write("(Hello)").unwrap();
373
374 let ops = context.operations();
375 assert!(ops.contains("(\\(Hello\\)) Tj"));
376 }
377
378 #[test]
379 fn test_write_line() {
380 let mut context = TextContext::new();
381 let initial_y = context.text_matrix[5];
382 context.write_line("Line 1").unwrap();
383
384 let new_y = context.text_matrix[5];
386 assert!(new_y < initial_y);
387 assert_eq!(new_y, initial_y - 12.0 * 1.2); }
389
390 #[test]
391 fn test_character_spacing() {
392 let mut context = TextContext::new();
393 context.set_character_spacing(2.5);
394
395 let ops = context.generate_text_state_operations();
396 assert!(ops.contains("2.50 Tc"));
397 }
398
399 #[test]
400 fn test_word_spacing() {
401 let mut context = TextContext::new();
402 context.set_word_spacing(1.5);
403
404 let ops = context.generate_text_state_operations();
405 assert!(ops.contains("1.50 Tw"));
406 }
407
408 #[test]
409 fn test_horizontal_scaling() {
410 let mut context = TextContext::new();
411 context.set_horizontal_scaling(1.25);
412
413 let ops = context.generate_text_state_operations();
414 assert!(ops.contains("125.00 Tz")); }
416
417 #[test]
418 fn test_leading() {
419 let mut context = TextContext::new();
420 context.set_leading(15.0);
421
422 let ops = context.generate_text_state_operations();
423 assert!(ops.contains("15.00 TL"));
424 }
425
426 #[test]
427 fn test_text_rise() {
428 let mut context = TextContext::new();
429 context.set_text_rise(3.0);
430
431 let ops = context.generate_text_state_operations();
432 assert!(ops.contains("3.00 Ts"));
433 }
434
435 #[test]
436 fn test_clear() {
437 let mut context = TextContext::new();
438 context.write("Hello").unwrap();
439 assert!(!context.operations().is_empty());
440
441 context.clear();
442 assert!(context.operations().is_empty());
443 }
444
445 #[test]
446 fn test_generate_operations() {
447 let mut context = TextContext::new();
448 context.write("Test").unwrap();
449
450 let ops_bytes = context.generate_operations().unwrap();
451 let ops_string = String::from_utf8(ops_bytes).unwrap();
452 assert_eq!(ops_string, context.operations());
453 }
454
455 #[test]
456 fn test_method_chaining() {
457 let mut context = TextContext::new();
458 context
459 .set_font(Font::Courier, 10.0)
460 .at(50.0, 100.0)
461 .set_character_spacing(1.0)
462 .set_word_spacing(2.0);
463
464 assert_eq!(context.current_font(), &Font::Courier);
465 assert_eq!(context.font_size(), 10.0);
466 let (x, y) = context.position();
467 assert_eq!(x, 50.0);
468 assert_eq!(y, 100.0);
469 }
470
471 #[test]
472 fn test_text_matrix_access() {
473 let mut context = TextContext::new();
474 context.at(25.0, 75.0);
475
476 let matrix = context.text_matrix();
477 assert_eq!(matrix, [1.0, 0.0, 0.0, 1.0, 25.0, 75.0]);
478 }
479
480 #[test]
481 fn test_special_characters_encoding() {
482 let mut context = TextContext::new();
483 context.write("Test\nLine\tTab").unwrap();
484
485 let ops = context.operations();
486 assert!(ops.contains("\\n"));
487 assert!(ops.contains("\\t"));
488 }
489
490 #[test]
491 fn test_rendering_mode_fill() {
492 let mut context = TextContext::new();
493 context.set_rendering_mode(TextRenderingMode::Fill);
494
495 let ops = context.generate_text_state_operations();
496 assert!(ops.contains("0 Tr"));
497 }
498
499 #[test]
500 fn test_rendering_mode_stroke() {
501 let mut context = TextContext::new();
502 context.set_rendering_mode(TextRenderingMode::Stroke);
503
504 let ops = context.generate_text_state_operations();
505 assert!(ops.contains("1 Tr"));
506 }
507
508 #[test]
509 fn test_rendering_mode_fill_stroke() {
510 let mut context = TextContext::new();
511 context.set_rendering_mode(TextRenderingMode::FillStroke);
512
513 let ops = context.generate_text_state_operations();
514 assert!(ops.contains("2 Tr"));
515 }
516
517 #[test]
518 fn test_rendering_mode_invisible() {
519 let mut context = TextContext::new();
520 context.set_rendering_mode(TextRenderingMode::Invisible);
521
522 let ops = context.generate_text_state_operations();
523 assert!(ops.contains("3 Tr"));
524 }
525
526 #[test]
527 fn test_rendering_mode_fill_clip() {
528 let mut context = TextContext::new();
529 context.set_rendering_mode(TextRenderingMode::FillClip);
530
531 let ops = context.generate_text_state_operations();
532 assert!(ops.contains("4 Tr"));
533 }
534
535 #[test]
536 fn test_rendering_mode_stroke_clip() {
537 let mut context = TextContext::new();
538 context.set_rendering_mode(TextRenderingMode::StrokeClip);
539
540 let ops = context.generate_text_state_operations();
541 assert!(ops.contains("5 Tr"));
542 }
543
544 #[test]
545 fn test_rendering_mode_fill_stroke_clip() {
546 let mut context = TextContext::new();
547 context.set_rendering_mode(TextRenderingMode::FillStrokeClip);
548
549 let ops = context.generate_text_state_operations();
550 assert!(ops.contains("6 Tr"));
551 }
552
553 #[test]
554 fn test_rendering_mode_clip() {
555 let mut context = TextContext::new();
556 context.set_rendering_mode(TextRenderingMode::Clip);
557
558 let ops = context.generate_text_state_operations();
559 assert!(ops.contains("7 Tr"));
560 }
561
562 #[test]
563 fn test_text_state_parameters_chaining() {
564 let mut context = TextContext::new();
565 context
566 .set_character_spacing(1.5)
567 .set_word_spacing(2.0)
568 .set_horizontal_scaling(1.1)
569 .set_leading(14.0)
570 .set_text_rise(0.5)
571 .set_rendering_mode(TextRenderingMode::FillStroke);
572
573 let ops = context.generate_text_state_operations();
574 assert!(ops.contains("1.50 Tc"));
575 assert!(ops.contains("2.00 Tw"));
576 assert!(ops.contains("110.00 Tz"));
577 assert!(ops.contains("14.00 TL"));
578 assert!(ops.contains("0.50 Ts"));
579 assert!(ops.contains("2 Tr"));
580 }
581
582 #[test]
583 fn test_all_text_state_operators_generated() {
584 let mut context = TextContext::new();
585
586 context.set_character_spacing(1.0); context.set_word_spacing(2.0); context.set_horizontal_scaling(1.2); context.set_leading(15.0); context.set_text_rise(1.0); context.set_rendering_mode(TextRenderingMode::Stroke); let ops = context.generate_text_state_operations();
595
596 assert!(
598 ops.contains("Tc"),
599 "Character spacing operator (Tc) not found"
600 );
601 assert!(ops.contains("Tw"), "Word spacing operator (Tw) not found");
602 assert!(
603 ops.contains("Tz"),
604 "Horizontal scaling operator (Tz) not found"
605 );
606 assert!(ops.contains("TL"), "Leading operator (TL) not found");
607 assert!(ops.contains("Ts"), "Text rise operator (Ts) not found");
608 assert!(
609 ops.contains("Tr"),
610 "Text rendering mode operator (Tr) not found"
611 );
612 }
613}