1pub mod cmap;
2mod encoding;
3pub mod extraction;
4mod extraction_cmap;
5mod flow;
6mod font;
7pub mod font_manager;
8pub mod fonts;
9mod header_footer;
10pub mod invoice;
11mod layout;
12mod list;
13pub mod metrics;
14pub mod ocr;
15pub mod plaintext;
16pub mod structured;
17pub mod table;
18pub mod table_detection;
19pub mod validation;
20
21#[cfg(test)]
22mod cmap_tests;
23
24#[cfg(feature = "ocr-tesseract")]
25pub mod tesseract_provider;
26
27pub use encoding::TextEncoding;
28pub use extraction::{ExtractedText, ExtractionOptions, TextExtractor, TextFragment};
29pub use flow::{TextAlign, TextFlowContext};
30pub use font::{Font, FontEncoding, FontFamily, FontWithEncoding};
31pub use font_manager::{CustomFont, FontDescriptor, FontFlags, FontManager, FontMetrics, FontType};
32pub use header_footer::{HeaderFooter, HeaderFooterOptions, HeaderFooterPosition};
33pub use layout::{ColumnContent, ColumnLayout, ColumnOptions, TextFormat};
34pub use list::{
35 BulletStyle, ListElement, ListItem, ListOptions, ListStyle as ListStyleEnum, OrderedList,
36 OrderedListStyle, UnorderedList,
37};
38pub use metrics::{measure_char, measure_text, split_into_words};
39pub use ocr::{
40 CharacterConfidence, CorrectionCandidate, CorrectionReason, CorrectionSuggestion,
41 CorrectionType, FragmentType, ImagePreprocessing, MockOcrProvider, OcrEngine, OcrError,
42 OcrOptions, OcrPostProcessor, OcrProcessingResult, OcrProvider, OcrRegion, OcrResult,
43 OcrTextFragment, WordConfidence,
44};
45pub use plaintext::{LineBreakMode, PlainTextConfig, PlainTextExtractor, PlainTextResult};
46pub use table::{HeaderStyle, Table, TableCell, TableOptions};
47pub use validation::{MatchType, TextMatch, TextValidationResult, TextValidator};
48
49#[cfg(feature = "ocr-tesseract")]
50pub use tesseract_provider::{RustyTesseractConfig, RustyTesseractProvider};
51
52use crate::error::Result;
53use crate::Color;
54use std::fmt::Write;
55
56#[derive(Clone, Copy, Debug, PartialEq, Eq)]
58pub enum TextRenderingMode {
59 Fill = 0,
61 Stroke = 1,
63 FillStroke = 2,
65 Invisible = 3,
67 FillClip = 4,
69 StrokeClip = 5,
71 FillStrokeClip = 6,
73 Clip = 7,
75}
76
77#[derive(Clone)]
78pub struct TextContext {
79 operations: String,
80 current_font: Font,
81 font_size: f64,
82 text_matrix: [f64; 6],
83 pending_position: Option<(f64, f64)>,
85 character_spacing: Option<f64>,
87 word_spacing: Option<f64>,
88 horizontal_scaling: Option<f64>,
89 leading: Option<f64>,
90 text_rise: Option<f64>,
91 rendering_mode: Option<TextRenderingMode>,
92 fill_color: Option<Color>,
94 stroke_color: Option<Color>,
95}
96
97impl Default for TextContext {
98 fn default() -> Self {
99 Self::new()
100 }
101}
102
103impl TextContext {
104 pub fn new() -> Self {
105 Self {
106 operations: String::new(),
107 current_font: Font::Helvetica,
108 font_size: 12.0,
109 text_matrix: [1.0, 0.0, 0.0, 1.0, 0.0, 0.0],
110 pending_position: None,
111 character_spacing: None,
112 word_spacing: None,
113 horizontal_scaling: None,
114 leading: None,
115 text_rise: None,
116 rendering_mode: None,
117 fill_color: None,
118 stroke_color: None,
119 }
120 }
121
122 pub fn set_font(&mut self, font: Font, size: f64) -> &mut Self {
123 self.current_font = font;
124 self.font_size = size;
125 self
126 }
127
128 #[allow(dead_code)]
130 pub(crate) fn current_font(&self) -> &Font {
131 &self.current_font
132 }
133
134 pub fn at(&mut self, x: f64, y: f64) -> &mut Self {
135 self.text_matrix[4] = x;
137 self.text_matrix[5] = y;
138 self.pending_position = Some((x, y));
139 self
140 }
141
142 pub fn write(&mut self, text: &str) -> Result<&mut Self> {
143 self.operations.push_str("BT\n");
145
146 writeln!(
148 &mut self.operations,
149 "/{} {} Tf",
150 self.current_font.pdf_name(),
151 self.font_size
152 )
153 .expect("Writing to String should never fail");
154
155 self.apply_text_state_parameters();
157
158 let (x, y) = if let Some((px, py)) = self.pending_position.take() {
160 (px, py)
162 } else {
163 (self.text_matrix[4], self.text_matrix[5])
165 };
166
167 writeln!(&mut self.operations, "{:.2} {:.2} Td", x, y)
168 .expect("Writing to String should never fail");
169
170 match &self.current_font {
172 Font::Custom(_) => {
173 let utf16_units: Vec<u16> = text.encode_utf16().collect();
175 let mut utf16be_bytes = Vec::new();
176
177 for unit in utf16_units {
178 utf16be_bytes.push((unit >> 8) as u8); utf16be_bytes.push((unit & 0xFF) as u8); }
181
182 self.operations.push('<');
184 for &byte in &utf16be_bytes {
185 write!(&mut self.operations, "{:02X}", byte)
186 .expect("Writing to String should never fail");
187 }
188 self.operations.push_str("> Tj\n");
189 }
190 _ => {
191 let encoding = TextEncoding::WinAnsiEncoding;
193 let encoded_bytes = encoding.encode(text);
194
195 self.operations.push('(');
197 for &byte in &encoded_bytes {
198 match byte {
199 b'(' => self.operations.push_str("\\("),
200 b')' => self.operations.push_str("\\)"),
201 b'\\' => self.operations.push_str("\\\\"),
202 b'\n' => self.operations.push_str("\\n"),
203 b'\r' => self.operations.push_str("\\r"),
204 b'\t' => self.operations.push_str("\\t"),
205 0x20..=0x7E => self.operations.push(byte as char),
207 _ => write!(&mut self.operations, "\\{byte:03o}")
209 .expect("Writing to String should never fail"),
210 }
211 }
212 self.operations.push_str(") Tj\n");
213 }
214 }
215
216 self.operations.push_str("ET\n");
218
219 Ok(self)
220 }
221
222 pub fn write_line(&mut self, text: &str) -> Result<&mut Self> {
223 self.write(text)?;
224 self.text_matrix[5] -= self.font_size * 1.2; Ok(self)
226 }
227
228 pub fn set_character_spacing(&mut self, spacing: f64) -> &mut Self {
229 self.character_spacing = Some(spacing);
230 self
231 }
232
233 pub fn set_word_spacing(&mut self, spacing: f64) -> &mut Self {
234 self.word_spacing = Some(spacing);
235 self
236 }
237
238 pub fn set_horizontal_scaling(&mut self, scale: f64) -> &mut Self {
239 self.horizontal_scaling = Some(scale);
240 self
241 }
242
243 pub fn set_leading(&mut self, leading: f64) -> &mut Self {
244 self.leading = Some(leading);
245 self
246 }
247
248 pub fn set_text_rise(&mut self, rise: f64) -> &mut Self {
249 self.text_rise = Some(rise);
250 self
251 }
252
253 pub fn set_rendering_mode(&mut self, mode: TextRenderingMode) -> &mut Self {
255 self.rendering_mode = Some(mode);
256 self
257 }
258
259 pub fn set_fill_color(&mut self, color: Color) -> &mut Self {
261 self.fill_color = Some(color);
262 self
263 }
264
265 pub fn set_stroke_color(&mut self, color: Color) -> &mut Self {
267 self.stroke_color = Some(color);
268 self
269 }
270
271 fn apply_text_state_parameters(&mut self) {
273 if let Some(spacing) = self.character_spacing {
275 writeln!(&mut self.operations, "{spacing:.2} Tc")
276 .expect("Writing to String should never fail");
277 }
278
279 if let Some(spacing) = self.word_spacing {
281 writeln!(&mut self.operations, "{spacing:.2} Tw")
282 .expect("Writing to String should never fail");
283 }
284
285 if let Some(scale) = self.horizontal_scaling {
287 writeln!(&mut self.operations, "{:.2} Tz", scale * 100.0)
288 .expect("Writing to String should never fail");
289 }
290
291 if let Some(leading) = self.leading {
293 writeln!(&mut self.operations, "{leading:.2} TL")
294 .expect("Writing to String should never fail");
295 }
296
297 if let Some(rise) = self.text_rise {
299 writeln!(&mut self.operations, "{rise:.2} Ts")
300 .expect("Writing to String should never fail");
301 }
302
303 if let Some(mode) = self.rendering_mode {
305 writeln!(&mut self.operations, "{} Tr", mode as u8)
306 .expect("Writing to String should never fail");
307 }
308
309 if let Some(color) = self.fill_color {
311 match color {
312 Color::Rgb(r, g, b) => {
313 writeln!(&mut self.operations, "{r:.3} {g:.3} {b:.3} rg")
314 .expect("Writing to String should never fail");
315 }
316 Color::Gray(gray) => {
317 writeln!(&mut self.operations, "{gray:.3} g")
318 .expect("Writing to String should never fail");
319 }
320 Color::Cmyk(c, m, y, k) => {
321 writeln!(&mut self.operations, "{c:.3} {m:.3} {y:.3} {k:.3} k")
322 .expect("Writing to String should never fail");
323 }
324 }
325 }
326
327 if let Some(color) = self.stroke_color {
329 match color {
330 Color::Rgb(r, g, b) => {
331 writeln!(&mut self.operations, "{r:.3} {g:.3} {b:.3} RG")
332 .expect("Writing to String should never fail");
333 }
334 Color::Gray(gray) => {
335 writeln!(&mut self.operations, "{gray:.3} G")
336 .expect("Writing to String should never fail");
337 }
338 Color::Cmyk(c, m, y, k) => {
339 writeln!(&mut self.operations, "{c:.3} {m:.3} {y:.3} {k:.3} K")
340 .expect("Writing to String should never fail");
341 }
342 }
343 }
344 }
345
346 pub(crate) fn generate_operations(&self) -> Result<Vec<u8>> {
347 Ok(self.operations.as_bytes().to_vec())
348 }
349
350 pub(crate) fn append_raw_operation(&mut self, operation: &str) {
355 self.operations.push_str(operation);
356 }
357
358 pub fn font_size(&self) -> f64 {
360 self.font_size
361 }
362
363 pub fn text_matrix(&self) -> [f64; 6] {
365 self.text_matrix
366 }
367
368 pub fn position(&self) -> (f64, f64) {
370 (self.text_matrix[4], self.text_matrix[5])
371 }
372
373 pub fn clear(&mut self) {
375 self.operations.clear();
376 self.character_spacing = None;
377 self.word_spacing = None;
378 self.horizontal_scaling = None;
379 self.leading = None;
380 self.text_rise = None;
381 self.rendering_mode = None;
382 self.fill_color = None;
383 self.stroke_color = None;
384 }
385
386 pub fn operations(&self) -> &str {
388 &self.operations
389 }
390
391 #[cfg(test)]
393 pub fn generate_text_state_operations(&self) -> String {
394 let mut ops = String::new();
395
396 if let Some(spacing) = self.character_spacing {
398 writeln!(&mut ops, "{spacing:.2} Tc").unwrap();
399 }
400
401 if let Some(spacing) = self.word_spacing {
403 writeln!(&mut ops, "{spacing:.2} Tw").unwrap();
404 }
405
406 if let Some(scale) = self.horizontal_scaling {
408 writeln!(&mut ops, "{:.2} Tz", scale * 100.0).unwrap();
409 }
410
411 if let Some(leading) = self.leading {
413 writeln!(&mut ops, "{leading:.2} TL").unwrap();
414 }
415
416 if let Some(rise) = self.text_rise {
418 writeln!(&mut ops, "{rise:.2} Ts").unwrap();
419 }
420
421 if let Some(mode) = self.rendering_mode {
423 writeln!(&mut ops, "{} Tr", mode as u8).unwrap();
424 }
425
426 ops
427 }
428}
429
430#[cfg(test)]
431mod tests {
432 use super::*;
433
434 #[test]
435 fn test_text_context_new() {
436 let context = TextContext::new();
437 assert_eq!(context.current_font, Font::Helvetica);
438 assert_eq!(context.font_size, 12.0);
439 assert_eq!(context.text_matrix, [1.0, 0.0, 0.0, 1.0, 0.0, 0.0]);
440 assert!(context.operations.is_empty());
441 }
442
443 #[test]
444 fn test_text_context_default() {
445 let context = TextContext::default();
446 assert_eq!(context.current_font, Font::Helvetica);
447 assert_eq!(context.font_size, 12.0);
448 }
449
450 #[test]
451 fn test_set_font() {
452 let mut context = TextContext::new();
453 context.set_font(Font::TimesBold, 14.0);
454 assert_eq!(context.current_font, Font::TimesBold);
455 assert_eq!(context.font_size, 14.0);
456 }
457
458 #[test]
459 fn test_position() {
460 let mut context = TextContext::new();
461 context.at(100.0, 200.0);
462 let (x, y) = context.position();
463 assert_eq!(x, 100.0);
464 assert_eq!(y, 200.0);
465 assert_eq!(context.text_matrix[4], 100.0);
466 assert_eq!(context.text_matrix[5], 200.0);
467 }
468
469 #[test]
470 fn test_write_simple_text() {
471 let mut context = TextContext::new();
472 context.write("Hello").unwrap();
473
474 let ops = context.operations();
475 assert!(ops.contains("BT\n"));
476 assert!(ops.contains("ET\n"));
477 assert!(ops.contains("/Helvetica 12 Tf"));
478 assert!(ops.contains("(Hello) Tj"));
479 }
480
481 #[test]
482 fn test_write_text_with_escaping() {
483 let mut context = TextContext::new();
484 context.write("(Hello)").unwrap();
485
486 let ops = context.operations();
487 assert!(ops.contains("(\\(Hello\\)) Tj"));
488 }
489
490 #[test]
491 fn test_write_line() {
492 let mut context = TextContext::new();
493 let initial_y = context.text_matrix[5];
494 context.write_line("Line 1").unwrap();
495
496 let new_y = context.text_matrix[5];
498 assert!(new_y < initial_y);
499 assert_eq!(new_y, initial_y - 12.0 * 1.2); }
501
502 #[test]
503 fn test_character_spacing() {
504 let mut context = TextContext::new();
505 context.set_character_spacing(2.5);
506
507 let ops = context.generate_text_state_operations();
508 assert!(ops.contains("2.50 Tc"));
509 }
510
511 #[test]
512 fn test_word_spacing() {
513 let mut context = TextContext::new();
514 context.set_word_spacing(1.5);
515
516 let ops = context.generate_text_state_operations();
517 assert!(ops.contains("1.50 Tw"));
518 }
519
520 #[test]
521 fn test_horizontal_scaling() {
522 let mut context = TextContext::new();
523 context.set_horizontal_scaling(1.25);
524
525 let ops = context.generate_text_state_operations();
526 assert!(ops.contains("125.00 Tz")); }
528
529 #[test]
530 fn test_leading() {
531 let mut context = TextContext::new();
532 context.set_leading(15.0);
533
534 let ops = context.generate_text_state_operations();
535 assert!(ops.contains("15.00 TL"));
536 }
537
538 #[test]
539 fn test_text_rise() {
540 let mut context = TextContext::new();
541 context.set_text_rise(3.0);
542
543 let ops = context.generate_text_state_operations();
544 assert!(ops.contains("3.00 Ts"));
545 }
546
547 #[test]
548 fn test_clear() {
549 let mut context = TextContext::new();
550 context.write("Hello").unwrap();
551 assert!(!context.operations().is_empty());
552
553 context.clear();
554 assert!(context.operations().is_empty());
555 }
556
557 #[test]
558 fn test_generate_operations() {
559 let mut context = TextContext::new();
560 context.write("Test").unwrap();
561
562 let ops_bytes = context.generate_operations().unwrap();
563 let ops_string = String::from_utf8(ops_bytes).unwrap();
564 assert_eq!(ops_string, context.operations());
565 }
566
567 #[test]
568 fn test_method_chaining() {
569 let mut context = TextContext::new();
570 context
571 .set_font(Font::Courier, 10.0)
572 .at(50.0, 100.0)
573 .set_character_spacing(1.0)
574 .set_word_spacing(2.0);
575
576 assert_eq!(context.current_font(), &Font::Courier);
577 assert_eq!(context.font_size(), 10.0);
578 let (x, y) = context.position();
579 assert_eq!(x, 50.0);
580 assert_eq!(y, 100.0);
581 }
582
583 #[test]
584 fn test_text_matrix_access() {
585 let mut context = TextContext::new();
586 context.at(25.0, 75.0);
587
588 let matrix = context.text_matrix();
589 assert_eq!(matrix, [1.0, 0.0, 0.0, 1.0, 25.0, 75.0]);
590 }
591
592 #[test]
593 fn test_special_characters_encoding() {
594 let mut context = TextContext::new();
595 context.write("Test\nLine\tTab").unwrap();
596
597 let ops = context.operations();
598 assert!(ops.contains("\\n"));
599 assert!(ops.contains("\\t"));
600 }
601
602 #[test]
603 fn test_rendering_mode_fill() {
604 let mut context = TextContext::new();
605 context.set_rendering_mode(TextRenderingMode::Fill);
606
607 let ops = context.generate_text_state_operations();
608 assert!(ops.contains("0 Tr"));
609 }
610
611 #[test]
612 fn test_rendering_mode_stroke() {
613 let mut context = TextContext::new();
614 context.set_rendering_mode(TextRenderingMode::Stroke);
615
616 let ops = context.generate_text_state_operations();
617 assert!(ops.contains("1 Tr"));
618 }
619
620 #[test]
621 fn test_rendering_mode_fill_stroke() {
622 let mut context = TextContext::new();
623 context.set_rendering_mode(TextRenderingMode::FillStroke);
624
625 let ops = context.generate_text_state_operations();
626 assert!(ops.contains("2 Tr"));
627 }
628
629 #[test]
630 fn test_rendering_mode_invisible() {
631 let mut context = TextContext::new();
632 context.set_rendering_mode(TextRenderingMode::Invisible);
633
634 let ops = context.generate_text_state_operations();
635 assert!(ops.contains("3 Tr"));
636 }
637
638 #[test]
639 fn test_rendering_mode_fill_clip() {
640 let mut context = TextContext::new();
641 context.set_rendering_mode(TextRenderingMode::FillClip);
642
643 let ops = context.generate_text_state_operations();
644 assert!(ops.contains("4 Tr"));
645 }
646
647 #[test]
648 fn test_rendering_mode_stroke_clip() {
649 let mut context = TextContext::new();
650 context.set_rendering_mode(TextRenderingMode::StrokeClip);
651
652 let ops = context.generate_text_state_operations();
653 assert!(ops.contains("5 Tr"));
654 }
655
656 #[test]
657 fn test_rendering_mode_fill_stroke_clip() {
658 let mut context = TextContext::new();
659 context.set_rendering_mode(TextRenderingMode::FillStrokeClip);
660
661 let ops = context.generate_text_state_operations();
662 assert!(ops.contains("6 Tr"));
663 }
664
665 #[test]
666 fn test_rendering_mode_clip() {
667 let mut context = TextContext::new();
668 context.set_rendering_mode(TextRenderingMode::Clip);
669
670 let ops = context.generate_text_state_operations();
671 assert!(ops.contains("7 Tr"));
672 }
673
674 #[test]
675 fn test_text_state_parameters_chaining() {
676 let mut context = TextContext::new();
677 context
678 .set_character_spacing(1.5)
679 .set_word_spacing(2.0)
680 .set_horizontal_scaling(1.1)
681 .set_leading(14.0)
682 .set_text_rise(0.5)
683 .set_rendering_mode(TextRenderingMode::FillStroke);
684
685 let ops = context.generate_text_state_operations();
686 assert!(ops.contains("1.50 Tc"));
687 assert!(ops.contains("2.00 Tw"));
688 assert!(ops.contains("110.00 Tz"));
689 assert!(ops.contains("14.00 TL"));
690 assert!(ops.contains("0.50 Ts"));
691 assert!(ops.contains("2 Tr"));
692 }
693
694 #[test]
695 fn test_all_text_state_operators_generated() {
696 let mut context = TextContext::new();
697
698 context.set_character_spacing(1.0); context.set_word_spacing(2.0); context.set_horizontal_scaling(1.2); context.set_leading(15.0); context.set_text_rise(1.0); context.set_rendering_mode(TextRenderingMode::Stroke); let ops = context.generate_text_state_operations();
707
708 assert!(
710 ops.contains("Tc"),
711 "Character spacing operator (Tc) not found"
712 );
713 assert!(ops.contains("Tw"), "Word spacing operator (Tw) not found");
714 assert!(
715 ops.contains("Tz"),
716 "Horizontal scaling operator (Tz) not found"
717 );
718 assert!(ops.contains("TL"), "Leading operator (TL) not found");
719 assert!(ops.contains("Ts"), "Text rise operator (Ts) not found");
720 assert!(
721 ops.contains("Tr"),
722 "Text rendering mode operator (Tr) not found"
723 );
724 }
725
726 #[test]
727 fn test_text_color_operations() {
728 use crate::Color;
729
730 let mut context = TextContext::new();
731
732 context.set_fill_color(Color::rgb(1.0, 0.0, 0.0));
734 context.apply_text_state_parameters();
735
736 let ops = context.operations();
737 assert!(
738 ops.contains("1.000 0.000 0.000 rg"),
739 "RGB fill color operator (rg) not found in: {ops}"
740 );
741
742 context.clear();
744 context.set_stroke_color(Color::rgb(0.0, 1.0, 0.0));
745 context.apply_text_state_parameters();
746
747 let ops = context.operations();
748 assert!(
749 ops.contains("0.000 1.000 0.000 RG"),
750 "RGB stroke color operator (RG) not found in: {ops}"
751 );
752
753 context.clear();
755 context.set_fill_color(Color::gray(0.5));
756 context.apply_text_state_parameters();
757
758 let ops = context.operations();
759 assert!(
760 ops.contains("0.500 g"),
761 "Gray fill color operator (g) not found in: {ops}"
762 );
763
764 context.clear();
766 context.set_stroke_color(Color::cmyk(0.2, 0.3, 0.4, 0.1));
767 context.apply_text_state_parameters();
768
769 let ops = context.operations();
770 assert!(
771 ops.contains("0.200 0.300 0.400 0.100 K"),
772 "CMYK stroke color operator (K) not found in: {ops}"
773 );
774
775 context.clear();
777 context.set_fill_color(Color::rgb(1.0, 0.0, 0.0));
778 context.set_stroke_color(Color::rgb(0.0, 0.0, 1.0));
779 context.apply_text_state_parameters();
780
781 let ops = context.operations();
782 assert!(
783 ops.contains("1.000 0.000 0.000 rg") && ops.contains("0.000 0.000 1.000 RG"),
784 "Both fill and stroke colors not found in: {ops}"
785 );
786 }
787}