1pub mod cmap;
2mod encoding;
3mod extraction;
4mod extraction_cmap;
5mod flow;
6mod font;
7pub mod font_manager;
8pub mod fonts;
9mod header_footer;
10mod layout;
11mod list;
12pub mod metrics;
13pub mod ocr;
14pub mod table;
15pub mod validation;
16
17#[cfg(test)]
18mod cmap_tests;
19
20#[cfg(feature = "ocr-tesseract")]
21pub mod tesseract_provider;
22
23pub use encoding::TextEncoding;
24pub use extraction::{ExtractedText, ExtractionOptions, TextExtractor, TextFragment};
25pub use flow::{TextAlign, TextFlowContext};
26pub use font::{Font, FontEncoding, FontFamily, FontWithEncoding};
27pub use font_manager::{CustomFont, FontDescriptor, FontFlags, FontManager, FontMetrics, FontType};
28pub use header_footer::{HeaderFooter, HeaderFooterOptions, HeaderFooterPosition};
29pub use layout::{ColumnContent, ColumnLayout, ColumnOptions, TextFormat};
30pub use list::{
31 BulletStyle, ListElement, ListItem, ListOptions, ListStyle as ListStyleEnum, OrderedList,
32 OrderedListStyle, UnorderedList,
33};
34pub use metrics::{measure_char, measure_text, split_into_words};
35pub use ocr::{
36 CharacterConfidence, CorrectionCandidate, CorrectionReason, CorrectionSuggestion,
37 CorrectionType, FragmentType, ImagePreprocessing, MockOcrProvider, OcrEngine, OcrError,
38 OcrOptions, OcrPostProcessor, OcrProcessingResult, OcrProvider, OcrRegion, OcrResult,
39 OcrTextFragment, WordConfidence,
40};
41pub use table::{HeaderStyle, Table, TableCell, TableOptions};
42pub use validation::{MatchType, TextMatch, TextValidationResult, TextValidator};
43
44#[cfg(feature = "ocr-tesseract")]
45pub use tesseract_provider::{RustyTesseractConfig, RustyTesseractProvider};
46
47use crate::error::Result;
48use crate::Color;
49use std::fmt::Write;
50
51#[derive(Clone, Copy, Debug, PartialEq, Eq)]
53pub enum TextRenderingMode {
54 Fill = 0,
56 Stroke = 1,
58 FillStroke = 2,
60 Invisible = 3,
62 FillClip = 4,
64 StrokeClip = 5,
66 FillStrokeClip = 6,
68 Clip = 7,
70}
71
72#[derive(Clone)]
73pub struct TextContext {
74 operations: String,
75 current_font: Font,
76 font_size: f64,
77 text_matrix: [f64; 6],
78 pending_position: Option<(f64, f64)>,
80 character_spacing: Option<f64>,
82 word_spacing: Option<f64>,
83 horizontal_scaling: Option<f64>,
84 leading: Option<f64>,
85 text_rise: Option<f64>,
86 rendering_mode: Option<TextRenderingMode>,
87 fill_color: Option<Color>,
89 stroke_color: Option<Color>,
90}
91
92impl Default for TextContext {
93 fn default() -> Self {
94 Self::new()
95 }
96}
97
98impl TextContext {
99 pub fn new() -> Self {
100 Self {
101 operations: String::new(),
102 current_font: Font::Helvetica,
103 font_size: 12.0,
104 text_matrix: [1.0, 0.0, 0.0, 1.0, 0.0, 0.0],
105 pending_position: None,
106 character_spacing: None,
107 word_spacing: None,
108 horizontal_scaling: None,
109 leading: None,
110 text_rise: None,
111 rendering_mode: None,
112 fill_color: None,
113 stroke_color: None,
114 }
115 }
116
117 pub fn set_font(&mut self, font: Font, size: f64) -> &mut Self {
118 self.current_font = font;
119 self.font_size = size;
120 self
121 }
122
123 #[allow(dead_code)]
125 pub(crate) fn current_font(&self) -> &Font {
126 &self.current_font
127 }
128
129 pub fn at(&mut self, x: f64, y: f64) -> &mut Self {
130 self.text_matrix[4] = x;
132 self.text_matrix[5] = y;
133 self.pending_position = Some((x, y));
134 self
135 }
136
137 pub fn write(&mut self, text: &str) -> Result<&mut Self> {
138 self.operations.push_str("BT\n");
140
141 writeln!(
143 &mut self.operations,
144 "/{} {} Tf",
145 self.current_font.pdf_name(),
146 self.font_size
147 )
148 .expect("Writing to String should never fail");
149
150 self.apply_text_state_parameters();
152
153 let (x, y) = if let Some((px, py)) = self.pending_position.take() {
155 (px, py)
157 } else {
158 (self.text_matrix[4], self.text_matrix[5])
160 };
161
162 writeln!(&mut self.operations, "{:.2} {:.2} Td", x, y)
163 .expect("Writing to String should never fail");
164
165 match &self.current_font {
167 Font::Custom(_) => {
168 let utf16_units: Vec<u16> = text.encode_utf16().collect();
170 let mut utf16be_bytes = Vec::new();
171
172 for unit in utf16_units {
173 utf16be_bytes.push((unit >> 8) as u8); utf16be_bytes.push((unit & 0xFF) as u8); }
176
177 self.operations.push('<');
179 for &byte in &utf16be_bytes {
180 write!(&mut self.operations, "{:02X}", byte)
181 .expect("Writing to String should never fail");
182 }
183 self.operations.push_str("> Tj\n");
184 }
185 _ => {
186 let encoding = TextEncoding::WinAnsiEncoding;
188 let encoded_bytes = encoding.encode(text);
189
190 self.operations.push('(');
192 for &byte in &encoded_bytes {
193 match byte {
194 b'(' => self.operations.push_str("\\("),
195 b')' => self.operations.push_str("\\)"),
196 b'\\' => self.operations.push_str("\\\\"),
197 b'\n' => self.operations.push_str("\\n"),
198 b'\r' => self.operations.push_str("\\r"),
199 b'\t' => self.operations.push_str("\\t"),
200 0x20..=0x7E => self.operations.push(byte as char),
202 _ => write!(&mut self.operations, "\\{byte:03o}")
204 .expect("Writing to String should never fail"),
205 }
206 }
207 self.operations.push_str(") Tj\n");
208 }
209 }
210
211 self.operations.push_str("ET\n");
213
214 Ok(self)
215 }
216
217 pub fn write_line(&mut self, text: &str) -> Result<&mut Self> {
218 self.write(text)?;
219 self.text_matrix[5] -= self.font_size * 1.2; Ok(self)
221 }
222
223 pub fn set_character_spacing(&mut self, spacing: f64) -> &mut Self {
224 self.character_spacing = Some(spacing);
225 self
226 }
227
228 pub fn set_word_spacing(&mut self, spacing: f64) -> &mut Self {
229 self.word_spacing = Some(spacing);
230 self
231 }
232
233 pub fn set_horizontal_scaling(&mut self, scale: f64) -> &mut Self {
234 self.horizontal_scaling = Some(scale);
235 self
236 }
237
238 pub fn set_leading(&mut self, leading: f64) -> &mut Self {
239 self.leading = Some(leading);
240 self
241 }
242
243 pub fn set_text_rise(&mut self, rise: f64) -> &mut Self {
244 self.text_rise = Some(rise);
245 self
246 }
247
248 pub fn set_rendering_mode(&mut self, mode: TextRenderingMode) -> &mut Self {
250 self.rendering_mode = Some(mode);
251 self
252 }
253
254 pub fn set_fill_color(&mut self, color: Color) -> &mut Self {
256 self.fill_color = Some(color);
257 self
258 }
259
260 pub fn set_stroke_color(&mut self, color: Color) -> &mut Self {
262 self.stroke_color = Some(color);
263 self
264 }
265
266 fn apply_text_state_parameters(&mut self) {
268 if let Some(spacing) = self.character_spacing {
270 writeln!(&mut self.operations, "{spacing:.2} Tc")
271 .expect("Writing to String should never fail");
272 }
273
274 if let Some(spacing) = self.word_spacing {
276 writeln!(&mut self.operations, "{spacing:.2} Tw")
277 .expect("Writing to String should never fail");
278 }
279
280 if let Some(scale) = self.horizontal_scaling {
282 writeln!(&mut self.operations, "{:.2} Tz", scale * 100.0)
283 .expect("Writing to String should never fail");
284 }
285
286 if let Some(leading) = self.leading {
288 writeln!(&mut self.operations, "{leading:.2} TL")
289 .expect("Writing to String should never fail");
290 }
291
292 if let Some(rise) = self.text_rise {
294 writeln!(&mut self.operations, "{rise:.2} Ts")
295 .expect("Writing to String should never fail");
296 }
297
298 if let Some(mode) = self.rendering_mode {
300 writeln!(&mut self.operations, "{} Tr", mode as u8)
301 .expect("Writing to String should never fail");
302 }
303
304 if let Some(color) = self.fill_color {
306 match color {
307 Color::Rgb(r, g, b) => {
308 writeln!(&mut self.operations, "{r:.3} {g:.3} {b:.3} rg")
309 .expect("Writing to String should never fail");
310 }
311 Color::Gray(gray) => {
312 writeln!(&mut self.operations, "{gray:.3} g")
313 .expect("Writing to String should never fail");
314 }
315 Color::Cmyk(c, m, y, k) => {
316 writeln!(&mut self.operations, "{c:.3} {m:.3} {y:.3} {k:.3} k")
317 .expect("Writing to String should never fail");
318 }
319 }
320 }
321
322 if let Some(color) = self.stroke_color {
324 match color {
325 Color::Rgb(r, g, b) => {
326 writeln!(&mut self.operations, "{r:.3} {g:.3} {b:.3} RG")
327 .expect("Writing to String should never fail");
328 }
329 Color::Gray(gray) => {
330 writeln!(&mut self.operations, "{gray:.3} G")
331 .expect("Writing to String should never fail");
332 }
333 Color::Cmyk(c, m, y, k) => {
334 writeln!(&mut self.operations, "{c:.3} {m:.3} {y:.3} {k:.3} K")
335 .expect("Writing to String should never fail");
336 }
337 }
338 }
339 }
340
341 pub(crate) fn generate_operations(&self) -> Result<Vec<u8>> {
342 Ok(self.operations.as_bytes().to_vec())
343 }
344
345 pub fn font_size(&self) -> f64 {
347 self.font_size
348 }
349
350 pub fn text_matrix(&self) -> [f64; 6] {
352 self.text_matrix
353 }
354
355 pub fn position(&self) -> (f64, f64) {
357 (self.text_matrix[4], self.text_matrix[5])
358 }
359
360 pub fn clear(&mut self) {
362 self.operations.clear();
363 self.character_spacing = None;
364 self.word_spacing = None;
365 self.horizontal_scaling = None;
366 self.leading = None;
367 self.text_rise = None;
368 self.rendering_mode = None;
369 self.fill_color = None;
370 self.stroke_color = None;
371 }
372
373 pub fn operations(&self) -> &str {
375 &self.operations
376 }
377
378 #[cfg(test)]
380 pub fn generate_text_state_operations(&self) -> String {
381 let mut ops = String::new();
382
383 if let Some(spacing) = self.character_spacing {
385 writeln!(&mut ops, "{spacing:.2} Tc").unwrap();
386 }
387
388 if let Some(spacing) = self.word_spacing {
390 writeln!(&mut ops, "{spacing:.2} Tw").unwrap();
391 }
392
393 if let Some(scale) = self.horizontal_scaling {
395 writeln!(&mut ops, "{:.2} Tz", scale * 100.0).unwrap();
396 }
397
398 if let Some(leading) = self.leading {
400 writeln!(&mut ops, "{leading:.2} TL").unwrap();
401 }
402
403 if let Some(rise) = self.text_rise {
405 writeln!(&mut ops, "{rise:.2} Ts").unwrap();
406 }
407
408 if let Some(mode) = self.rendering_mode {
410 writeln!(&mut ops, "{} Tr", mode as u8).unwrap();
411 }
412
413 ops
414 }
415}
416
417#[cfg(test)]
418mod tests {
419 use super::*;
420
421 #[test]
422 fn test_text_context_new() {
423 let context = TextContext::new();
424 assert_eq!(context.current_font, Font::Helvetica);
425 assert_eq!(context.font_size, 12.0);
426 assert_eq!(context.text_matrix, [1.0, 0.0, 0.0, 1.0, 0.0, 0.0]);
427 assert!(context.operations.is_empty());
428 }
429
430 #[test]
431 fn test_text_context_default() {
432 let context = TextContext::default();
433 assert_eq!(context.current_font, Font::Helvetica);
434 assert_eq!(context.font_size, 12.0);
435 }
436
437 #[test]
438 fn test_set_font() {
439 let mut context = TextContext::new();
440 context.set_font(Font::TimesBold, 14.0);
441 assert_eq!(context.current_font, Font::TimesBold);
442 assert_eq!(context.font_size, 14.0);
443 }
444
445 #[test]
446 fn test_position() {
447 let mut context = TextContext::new();
448 context.at(100.0, 200.0);
449 let (x, y) = context.position();
450 assert_eq!(x, 100.0);
451 assert_eq!(y, 200.0);
452 assert_eq!(context.text_matrix[4], 100.0);
453 assert_eq!(context.text_matrix[5], 200.0);
454 }
455
456 #[test]
457 fn test_write_simple_text() {
458 let mut context = TextContext::new();
459 context.write("Hello").unwrap();
460
461 let ops = context.operations();
462 assert!(ops.contains("BT\n"));
463 assert!(ops.contains("ET\n"));
464 assert!(ops.contains("/Helvetica 12 Tf"));
465 assert!(ops.contains("(Hello) Tj"));
466 }
467
468 #[test]
469 fn test_write_text_with_escaping() {
470 let mut context = TextContext::new();
471 context.write("(Hello)").unwrap();
472
473 let ops = context.operations();
474 assert!(ops.contains("(\\(Hello\\)) Tj"));
475 }
476
477 #[test]
478 fn test_write_line() {
479 let mut context = TextContext::new();
480 let initial_y = context.text_matrix[5];
481 context.write_line("Line 1").unwrap();
482
483 let new_y = context.text_matrix[5];
485 assert!(new_y < initial_y);
486 assert_eq!(new_y, initial_y - 12.0 * 1.2); }
488
489 #[test]
490 fn test_character_spacing() {
491 let mut context = TextContext::new();
492 context.set_character_spacing(2.5);
493
494 let ops = context.generate_text_state_operations();
495 assert!(ops.contains("2.50 Tc"));
496 }
497
498 #[test]
499 fn test_word_spacing() {
500 let mut context = TextContext::new();
501 context.set_word_spacing(1.5);
502
503 let ops = context.generate_text_state_operations();
504 assert!(ops.contains("1.50 Tw"));
505 }
506
507 #[test]
508 fn test_horizontal_scaling() {
509 let mut context = TextContext::new();
510 context.set_horizontal_scaling(1.25);
511
512 let ops = context.generate_text_state_operations();
513 assert!(ops.contains("125.00 Tz")); }
515
516 #[test]
517 fn test_leading() {
518 let mut context = TextContext::new();
519 context.set_leading(15.0);
520
521 let ops = context.generate_text_state_operations();
522 assert!(ops.contains("15.00 TL"));
523 }
524
525 #[test]
526 fn test_text_rise() {
527 let mut context = TextContext::new();
528 context.set_text_rise(3.0);
529
530 let ops = context.generate_text_state_operations();
531 assert!(ops.contains("3.00 Ts"));
532 }
533
534 #[test]
535 fn test_clear() {
536 let mut context = TextContext::new();
537 context.write("Hello").unwrap();
538 assert!(!context.operations().is_empty());
539
540 context.clear();
541 assert!(context.operations().is_empty());
542 }
543
544 #[test]
545 fn test_generate_operations() {
546 let mut context = TextContext::new();
547 context.write("Test").unwrap();
548
549 let ops_bytes = context.generate_operations().unwrap();
550 let ops_string = String::from_utf8(ops_bytes).unwrap();
551 assert_eq!(ops_string, context.operations());
552 }
553
554 #[test]
555 fn test_method_chaining() {
556 let mut context = TextContext::new();
557 context
558 .set_font(Font::Courier, 10.0)
559 .at(50.0, 100.0)
560 .set_character_spacing(1.0)
561 .set_word_spacing(2.0);
562
563 assert_eq!(context.current_font(), &Font::Courier);
564 assert_eq!(context.font_size(), 10.0);
565 let (x, y) = context.position();
566 assert_eq!(x, 50.0);
567 assert_eq!(y, 100.0);
568 }
569
570 #[test]
571 fn test_text_matrix_access() {
572 let mut context = TextContext::new();
573 context.at(25.0, 75.0);
574
575 let matrix = context.text_matrix();
576 assert_eq!(matrix, [1.0, 0.0, 0.0, 1.0, 25.0, 75.0]);
577 }
578
579 #[test]
580 fn test_special_characters_encoding() {
581 let mut context = TextContext::new();
582 context.write("Test\nLine\tTab").unwrap();
583
584 let ops = context.operations();
585 assert!(ops.contains("\\n"));
586 assert!(ops.contains("\\t"));
587 }
588
589 #[test]
590 fn test_rendering_mode_fill() {
591 let mut context = TextContext::new();
592 context.set_rendering_mode(TextRenderingMode::Fill);
593
594 let ops = context.generate_text_state_operations();
595 assert!(ops.contains("0 Tr"));
596 }
597
598 #[test]
599 fn test_rendering_mode_stroke() {
600 let mut context = TextContext::new();
601 context.set_rendering_mode(TextRenderingMode::Stroke);
602
603 let ops = context.generate_text_state_operations();
604 assert!(ops.contains("1 Tr"));
605 }
606
607 #[test]
608 fn test_rendering_mode_fill_stroke() {
609 let mut context = TextContext::new();
610 context.set_rendering_mode(TextRenderingMode::FillStroke);
611
612 let ops = context.generate_text_state_operations();
613 assert!(ops.contains("2 Tr"));
614 }
615
616 #[test]
617 fn test_rendering_mode_invisible() {
618 let mut context = TextContext::new();
619 context.set_rendering_mode(TextRenderingMode::Invisible);
620
621 let ops = context.generate_text_state_operations();
622 assert!(ops.contains("3 Tr"));
623 }
624
625 #[test]
626 fn test_rendering_mode_fill_clip() {
627 let mut context = TextContext::new();
628 context.set_rendering_mode(TextRenderingMode::FillClip);
629
630 let ops = context.generate_text_state_operations();
631 assert!(ops.contains("4 Tr"));
632 }
633
634 #[test]
635 fn test_rendering_mode_stroke_clip() {
636 let mut context = TextContext::new();
637 context.set_rendering_mode(TextRenderingMode::StrokeClip);
638
639 let ops = context.generate_text_state_operations();
640 assert!(ops.contains("5 Tr"));
641 }
642
643 #[test]
644 fn test_rendering_mode_fill_stroke_clip() {
645 let mut context = TextContext::new();
646 context.set_rendering_mode(TextRenderingMode::FillStrokeClip);
647
648 let ops = context.generate_text_state_operations();
649 assert!(ops.contains("6 Tr"));
650 }
651
652 #[test]
653 fn test_rendering_mode_clip() {
654 let mut context = TextContext::new();
655 context.set_rendering_mode(TextRenderingMode::Clip);
656
657 let ops = context.generate_text_state_operations();
658 assert!(ops.contains("7 Tr"));
659 }
660
661 #[test]
662 fn test_text_state_parameters_chaining() {
663 let mut context = TextContext::new();
664 context
665 .set_character_spacing(1.5)
666 .set_word_spacing(2.0)
667 .set_horizontal_scaling(1.1)
668 .set_leading(14.0)
669 .set_text_rise(0.5)
670 .set_rendering_mode(TextRenderingMode::FillStroke);
671
672 let ops = context.generate_text_state_operations();
673 assert!(ops.contains("1.50 Tc"));
674 assert!(ops.contains("2.00 Tw"));
675 assert!(ops.contains("110.00 Tz"));
676 assert!(ops.contains("14.00 TL"));
677 assert!(ops.contains("0.50 Ts"));
678 assert!(ops.contains("2 Tr"));
679 }
680
681 #[test]
682 fn test_all_text_state_operators_generated() {
683 let mut context = TextContext::new();
684
685 context.set_character_spacing(1.0); context.set_word_spacing(2.0); context.set_horizontal_scaling(1.2); context.set_leading(15.0); context.set_text_rise(1.0); context.set_rendering_mode(TextRenderingMode::Stroke); let ops = context.generate_text_state_operations();
694
695 assert!(
697 ops.contains("Tc"),
698 "Character spacing operator (Tc) not found"
699 );
700 assert!(ops.contains("Tw"), "Word spacing operator (Tw) not found");
701 assert!(
702 ops.contains("Tz"),
703 "Horizontal scaling operator (Tz) not found"
704 );
705 assert!(ops.contains("TL"), "Leading operator (TL) not found");
706 assert!(ops.contains("Ts"), "Text rise operator (Ts) not found");
707 assert!(
708 ops.contains("Tr"),
709 "Text rendering mode operator (Tr) not found"
710 );
711 }
712
713 #[test]
714 fn test_text_color_operations() {
715 use crate::Color;
716
717 let mut context = TextContext::new();
718
719 context.set_fill_color(Color::rgb(1.0, 0.0, 0.0));
721 context.apply_text_state_parameters();
722
723 let ops = context.operations();
724 assert!(
725 ops.contains("1.000 0.000 0.000 rg"),
726 "RGB fill color operator (rg) not found in: {ops}"
727 );
728
729 context.clear();
731 context.set_stroke_color(Color::rgb(0.0, 1.0, 0.0));
732 context.apply_text_state_parameters();
733
734 let ops = context.operations();
735 assert!(
736 ops.contains("0.000 1.000 0.000 RG"),
737 "RGB stroke color operator (RG) not found in: {ops}"
738 );
739
740 context.clear();
742 context.set_fill_color(Color::gray(0.5));
743 context.apply_text_state_parameters();
744
745 let ops = context.operations();
746 assert!(
747 ops.contains("0.500 g"),
748 "Gray fill color operator (g) not found in: {ops}"
749 );
750
751 context.clear();
753 context.set_stroke_color(Color::cmyk(0.2, 0.3, 0.4, 0.1));
754 context.apply_text_state_parameters();
755
756 let ops = context.operations();
757 assert!(
758 ops.contains("0.200 0.300 0.400 0.100 K"),
759 "CMYK stroke color operator (K) not found in: {ops}"
760 );
761
762 context.clear();
764 context.set_fill_color(Color::rgb(1.0, 0.0, 0.0));
765 context.set_stroke_color(Color::rgb(0.0, 0.0, 1.0));
766 context.apply_text_state_parameters();
767
768 let ops = context.operations();
769 assert!(
770 ops.contains("1.000 0.000 0.000 rg") && ops.contains("0.000 0.000 1.000 RG"),
771 "Both fill and stroke colors not found in: {ops}"
772 );
773 }
774}