1pub mod cmap;
2mod encoding;
3mod extraction;
4mod extraction_cmap;
5mod flow;
6mod font;
7pub mod font_manager;
8pub mod fonts;
9mod header_footer;
10mod layout;
11mod list;
12pub mod metrics;
13pub mod ocr;
14pub mod table;
15pub mod validation;
16
17#[cfg(test)]
18mod cmap_tests;
19
20#[cfg(feature = "ocr-tesseract")]
21pub mod tesseract_provider;
22
23pub use encoding::TextEncoding;
24pub use extraction::{ExtractedText, ExtractionOptions, TextExtractor, TextFragment};
25pub use flow::{TextAlign, TextFlowContext};
26pub use font::{Font, FontEncoding, FontFamily, FontWithEncoding};
27pub use font_manager::{CustomFont, FontDescriptor, FontFlags, FontManager, FontMetrics, FontType};
28pub use header_footer::{HeaderFooter, HeaderFooterOptions, HeaderFooterPosition};
29pub use layout::{ColumnContent, ColumnLayout, ColumnOptions, TextFormat};
30pub use list::{
31 BulletStyle, ListElement, ListItem, ListOptions, ListStyle as ListStyleEnum, OrderedList,
32 OrderedListStyle, UnorderedList,
33};
34pub use metrics::{measure_char, measure_text, split_into_words};
35pub use ocr::{
36 CharacterConfidence, CorrectionCandidate, CorrectionReason, CorrectionSuggestion,
37 CorrectionType, FragmentType, ImagePreprocessing, MockOcrProvider, OcrEngine, OcrError,
38 OcrOptions, OcrPostProcessor, OcrProcessingResult, OcrProvider, OcrRegion, OcrResult,
39 OcrTextFragment, WordConfidence,
40};
41pub use table::{HeaderStyle, Table, TableCell, TableOptions};
42pub use validation::{MatchType, TextMatch, TextValidationResult, TextValidator};
43
44#[cfg(feature = "ocr-tesseract")]
45pub use tesseract_provider::{RustyTesseractConfig, RustyTesseractProvider};
46
47use crate::error::Result;
48use crate::Color;
49use std::fmt::Write;
50
51#[derive(Clone, Copy, Debug, PartialEq, Eq)]
53pub enum TextRenderingMode {
54 Fill = 0,
56 Stroke = 1,
58 FillStroke = 2,
60 Invisible = 3,
62 FillClip = 4,
64 StrokeClip = 5,
66 FillStrokeClip = 6,
68 Clip = 7,
70}
71
72#[derive(Clone)]
73pub struct TextContext {
74 operations: String,
75 current_font: Font,
76 font_size: f64,
77 text_matrix: [f64; 6],
78 pending_position: Option<(f64, f64)>,
80 character_spacing: Option<f64>,
82 word_spacing: Option<f64>,
83 horizontal_scaling: Option<f64>,
84 leading: Option<f64>,
85 text_rise: Option<f64>,
86 rendering_mode: Option<TextRenderingMode>,
87 fill_color: Option<Color>,
89 stroke_color: Option<Color>,
90}
91
92impl Default for TextContext {
93 fn default() -> Self {
94 Self::new()
95 }
96}
97
98impl TextContext {
99 pub fn new() -> Self {
100 Self {
101 operations: String::new(),
102 current_font: Font::Helvetica,
103 font_size: 12.0,
104 text_matrix: [1.0, 0.0, 0.0, 1.0, 0.0, 0.0],
105 pending_position: None,
106 character_spacing: None,
107 word_spacing: None,
108 horizontal_scaling: None,
109 leading: None,
110 text_rise: None,
111 rendering_mode: None,
112 fill_color: None,
113 stroke_color: None,
114 }
115 }
116
117 pub fn set_font(&mut self, font: Font, size: f64) -> &mut Self {
118 self.current_font = font;
119 self.font_size = size;
120 self
121 }
122
123 #[allow(dead_code)]
125 pub(crate) fn current_font(&self) -> &Font {
126 &self.current_font
127 }
128
129 pub fn at(&mut self, x: f64, y: f64) -> &mut Self {
130 self.text_matrix[4] = x;
132 self.text_matrix[5] = y;
133 self.pending_position = Some((x, y));
134 self
135 }
136
137 pub fn write(&mut self, text: &str) -> Result<&mut Self> {
138 self.operations.push_str("BT\n");
140
141 writeln!(
143 &mut self.operations,
144 "/{} {} Tf",
145 self.current_font.pdf_name(),
146 self.font_size
147 )
148 .expect("Writing to String should never fail");
149
150 self.apply_text_state_parameters();
152
153 let (x, y) = if let Some((px, py)) = self.pending_position.take() {
155 (px, py)
157 } else {
158 (self.text_matrix[4], self.text_matrix[5])
160 };
161
162 writeln!(&mut self.operations, "{:.2} {:.2} Td", x, y)
163 .expect("Writing to String should never fail");
164
165 match &self.current_font {
167 Font::Custom(_) => {
168 let utf16_units: Vec<u16> = text.encode_utf16().collect();
170 let mut utf16be_bytes = Vec::new();
171
172 for unit in utf16_units {
173 utf16be_bytes.push((unit >> 8) as u8); utf16be_bytes.push((unit & 0xFF) as u8); }
176
177 self.operations.push('<');
179 for &byte in &utf16be_bytes {
180 write!(&mut self.operations, "{:02X}", byte)
181 .expect("Writing to String should never fail");
182 }
183 self.operations.push_str("> Tj\n");
184 }
185 _ => {
186 let encoding = TextEncoding::WinAnsiEncoding;
188 let encoded_bytes = encoding.encode(text);
189
190 self.operations.push('(');
192 for &byte in &encoded_bytes {
193 match byte {
194 b'(' => self.operations.push_str("\\("),
195 b')' => self.operations.push_str("\\)"),
196 b'\\' => self.operations.push_str("\\\\"),
197 b'\n' => self.operations.push_str("\\n"),
198 b'\r' => self.operations.push_str("\\r"),
199 b'\t' => self.operations.push_str("\\t"),
200 0x20..=0x7E => self.operations.push(byte as char),
202 _ => write!(&mut self.operations, "\\{byte:03o}")
204 .expect("Writing to String should never fail"),
205 }
206 }
207 self.operations.push_str(") Tj\n");
208 }
209 }
210
211 self.operations.push_str("ET\n");
213
214 Ok(self)
215 }
216
217 pub fn write_line(&mut self, text: &str) -> Result<&mut Self> {
218 self.write(text)?;
219 self.text_matrix[5] -= self.font_size * 1.2; Ok(self)
221 }
222
223 pub fn set_character_spacing(&mut self, spacing: f64) -> &mut Self {
224 self.character_spacing = Some(spacing);
225 self
226 }
227
228 pub fn set_word_spacing(&mut self, spacing: f64) -> &mut Self {
229 self.word_spacing = Some(spacing);
230 self
231 }
232
233 pub fn set_horizontal_scaling(&mut self, scale: f64) -> &mut Self {
234 self.horizontal_scaling = Some(scale);
235 self
236 }
237
238 pub fn set_leading(&mut self, leading: f64) -> &mut Self {
239 self.leading = Some(leading);
240 self
241 }
242
243 pub fn set_text_rise(&mut self, rise: f64) -> &mut Self {
244 self.text_rise = Some(rise);
245 self
246 }
247
248 pub fn set_rendering_mode(&mut self, mode: TextRenderingMode) -> &mut Self {
250 self.rendering_mode = Some(mode);
251 self
252 }
253
254 pub fn set_fill_color(&mut self, color: Color) -> &mut Self {
256 self.fill_color = Some(color);
257 self
258 }
259
260 pub fn set_stroke_color(&mut self, color: Color) -> &mut Self {
262 self.stroke_color = Some(color);
263 self
264 }
265
266 fn apply_text_state_parameters(&mut self) {
268 if let Some(spacing) = self.character_spacing {
270 writeln!(&mut self.operations, "{spacing:.2} Tc")
271 .expect("Writing to String should never fail");
272 }
273
274 if let Some(spacing) = self.word_spacing {
276 writeln!(&mut self.operations, "{spacing:.2} Tw")
277 .expect("Writing to String should never fail");
278 }
279
280 if let Some(scale) = self.horizontal_scaling {
282 writeln!(&mut self.operations, "{:.2} Tz", scale * 100.0)
283 .expect("Writing to String should never fail");
284 }
285
286 if let Some(leading) = self.leading {
288 writeln!(&mut self.operations, "{leading:.2} TL")
289 .expect("Writing to String should never fail");
290 }
291
292 if let Some(rise) = self.text_rise {
294 writeln!(&mut self.operations, "{rise:.2} Ts")
295 .expect("Writing to String should never fail");
296 }
297
298 if let Some(mode) = self.rendering_mode {
300 writeln!(&mut self.operations, "{} Tr", mode as u8)
301 .expect("Writing to String should never fail");
302 }
303
304 if let Some(color) = self.fill_color {
306 match color {
307 Color::Rgb(r, g, b) => {
308 writeln!(&mut self.operations, "{r:.3} {g:.3} {b:.3} rg")
309 .expect("Writing to String should never fail");
310 }
311 Color::Gray(gray) => {
312 writeln!(&mut self.operations, "{gray:.3} g")
313 .expect("Writing to String should never fail");
314 }
315 Color::Cmyk(c, m, y, k) => {
316 writeln!(&mut self.operations, "{c:.3} {m:.3} {y:.3} {k:.3} k")
317 .expect("Writing to String should never fail");
318 }
319 }
320 }
321
322 if let Some(color) = self.stroke_color {
324 match color {
325 Color::Rgb(r, g, b) => {
326 writeln!(&mut self.operations, "{r:.3} {g:.3} {b:.3} RG")
327 .expect("Writing to String should never fail");
328 }
329 Color::Gray(gray) => {
330 writeln!(&mut self.operations, "{gray:.3} G")
331 .expect("Writing to String should never fail");
332 }
333 Color::Cmyk(c, m, y, k) => {
334 writeln!(&mut self.operations, "{c:.3} {m:.3} {y:.3} {k:.3} K")
335 .expect("Writing to String should never fail");
336 }
337 }
338 }
339 }
340
341 pub(crate) fn generate_operations(&self) -> Result<Vec<u8>> {
342 Ok(self.operations.as_bytes().to_vec())
343 }
344
345 pub(crate) fn append_raw_operation(&mut self, operation: &str) {
350 self.operations.push_str(operation);
351 }
352
353 pub fn font_size(&self) -> f64 {
355 self.font_size
356 }
357
358 pub fn text_matrix(&self) -> [f64; 6] {
360 self.text_matrix
361 }
362
363 pub fn position(&self) -> (f64, f64) {
365 (self.text_matrix[4], self.text_matrix[5])
366 }
367
368 pub fn clear(&mut self) {
370 self.operations.clear();
371 self.character_spacing = None;
372 self.word_spacing = None;
373 self.horizontal_scaling = None;
374 self.leading = None;
375 self.text_rise = None;
376 self.rendering_mode = None;
377 self.fill_color = None;
378 self.stroke_color = None;
379 }
380
381 pub fn operations(&self) -> &str {
383 &self.operations
384 }
385
386 #[cfg(test)]
388 pub fn generate_text_state_operations(&self) -> String {
389 let mut ops = String::new();
390
391 if let Some(spacing) = self.character_spacing {
393 writeln!(&mut ops, "{spacing:.2} Tc").unwrap();
394 }
395
396 if let Some(spacing) = self.word_spacing {
398 writeln!(&mut ops, "{spacing:.2} Tw").unwrap();
399 }
400
401 if let Some(scale) = self.horizontal_scaling {
403 writeln!(&mut ops, "{:.2} Tz", scale * 100.0).unwrap();
404 }
405
406 if let Some(leading) = self.leading {
408 writeln!(&mut ops, "{leading:.2} TL").unwrap();
409 }
410
411 if let Some(rise) = self.text_rise {
413 writeln!(&mut ops, "{rise:.2} Ts").unwrap();
414 }
415
416 if let Some(mode) = self.rendering_mode {
418 writeln!(&mut ops, "{} Tr", mode as u8).unwrap();
419 }
420
421 ops
422 }
423}
424
425#[cfg(test)]
426mod tests {
427 use super::*;
428
429 #[test]
430 fn test_text_context_new() {
431 let context = TextContext::new();
432 assert_eq!(context.current_font, Font::Helvetica);
433 assert_eq!(context.font_size, 12.0);
434 assert_eq!(context.text_matrix, [1.0, 0.0, 0.0, 1.0, 0.0, 0.0]);
435 assert!(context.operations.is_empty());
436 }
437
438 #[test]
439 fn test_text_context_default() {
440 let context = TextContext::default();
441 assert_eq!(context.current_font, Font::Helvetica);
442 assert_eq!(context.font_size, 12.0);
443 }
444
445 #[test]
446 fn test_set_font() {
447 let mut context = TextContext::new();
448 context.set_font(Font::TimesBold, 14.0);
449 assert_eq!(context.current_font, Font::TimesBold);
450 assert_eq!(context.font_size, 14.0);
451 }
452
453 #[test]
454 fn test_position() {
455 let mut context = TextContext::new();
456 context.at(100.0, 200.0);
457 let (x, y) = context.position();
458 assert_eq!(x, 100.0);
459 assert_eq!(y, 200.0);
460 assert_eq!(context.text_matrix[4], 100.0);
461 assert_eq!(context.text_matrix[5], 200.0);
462 }
463
464 #[test]
465 fn test_write_simple_text() {
466 let mut context = TextContext::new();
467 context.write("Hello").unwrap();
468
469 let ops = context.operations();
470 assert!(ops.contains("BT\n"));
471 assert!(ops.contains("ET\n"));
472 assert!(ops.contains("/Helvetica 12 Tf"));
473 assert!(ops.contains("(Hello) Tj"));
474 }
475
476 #[test]
477 fn test_write_text_with_escaping() {
478 let mut context = TextContext::new();
479 context.write("(Hello)").unwrap();
480
481 let ops = context.operations();
482 assert!(ops.contains("(\\(Hello\\)) Tj"));
483 }
484
485 #[test]
486 fn test_write_line() {
487 let mut context = TextContext::new();
488 let initial_y = context.text_matrix[5];
489 context.write_line("Line 1").unwrap();
490
491 let new_y = context.text_matrix[5];
493 assert!(new_y < initial_y);
494 assert_eq!(new_y, initial_y - 12.0 * 1.2); }
496
497 #[test]
498 fn test_character_spacing() {
499 let mut context = TextContext::new();
500 context.set_character_spacing(2.5);
501
502 let ops = context.generate_text_state_operations();
503 assert!(ops.contains("2.50 Tc"));
504 }
505
506 #[test]
507 fn test_word_spacing() {
508 let mut context = TextContext::new();
509 context.set_word_spacing(1.5);
510
511 let ops = context.generate_text_state_operations();
512 assert!(ops.contains("1.50 Tw"));
513 }
514
515 #[test]
516 fn test_horizontal_scaling() {
517 let mut context = TextContext::new();
518 context.set_horizontal_scaling(1.25);
519
520 let ops = context.generate_text_state_operations();
521 assert!(ops.contains("125.00 Tz")); }
523
524 #[test]
525 fn test_leading() {
526 let mut context = TextContext::new();
527 context.set_leading(15.0);
528
529 let ops = context.generate_text_state_operations();
530 assert!(ops.contains("15.00 TL"));
531 }
532
533 #[test]
534 fn test_text_rise() {
535 let mut context = TextContext::new();
536 context.set_text_rise(3.0);
537
538 let ops = context.generate_text_state_operations();
539 assert!(ops.contains("3.00 Ts"));
540 }
541
542 #[test]
543 fn test_clear() {
544 let mut context = TextContext::new();
545 context.write("Hello").unwrap();
546 assert!(!context.operations().is_empty());
547
548 context.clear();
549 assert!(context.operations().is_empty());
550 }
551
552 #[test]
553 fn test_generate_operations() {
554 let mut context = TextContext::new();
555 context.write("Test").unwrap();
556
557 let ops_bytes = context.generate_operations().unwrap();
558 let ops_string = String::from_utf8(ops_bytes).unwrap();
559 assert_eq!(ops_string, context.operations());
560 }
561
562 #[test]
563 fn test_method_chaining() {
564 let mut context = TextContext::new();
565 context
566 .set_font(Font::Courier, 10.0)
567 .at(50.0, 100.0)
568 .set_character_spacing(1.0)
569 .set_word_spacing(2.0);
570
571 assert_eq!(context.current_font(), &Font::Courier);
572 assert_eq!(context.font_size(), 10.0);
573 let (x, y) = context.position();
574 assert_eq!(x, 50.0);
575 assert_eq!(y, 100.0);
576 }
577
578 #[test]
579 fn test_text_matrix_access() {
580 let mut context = TextContext::new();
581 context.at(25.0, 75.0);
582
583 let matrix = context.text_matrix();
584 assert_eq!(matrix, [1.0, 0.0, 0.0, 1.0, 25.0, 75.0]);
585 }
586
587 #[test]
588 fn test_special_characters_encoding() {
589 let mut context = TextContext::new();
590 context.write("Test\nLine\tTab").unwrap();
591
592 let ops = context.operations();
593 assert!(ops.contains("\\n"));
594 assert!(ops.contains("\\t"));
595 }
596
597 #[test]
598 fn test_rendering_mode_fill() {
599 let mut context = TextContext::new();
600 context.set_rendering_mode(TextRenderingMode::Fill);
601
602 let ops = context.generate_text_state_operations();
603 assert!(ops.contains("0 Tr"));
604 }
605
606 #[test]
607 fn test_rendering_mode_stroke() {
608 let mut context = TextContext::new();
609 context.set_rendering_mode(TextRenderingMode::Stroke);
610
611 let ops = context.generate_text_state_operations();
612 assert!(ops.contains("1 Tr"));
613 }
614
615 #[test]
616 fn test_rendering_mode_fill_stroke() {
617 let mut context = TextContext::new();
618 context.set_rendering_mode(TextRenderingMode::FillStroke);
619
620 let ops = context.generate_text_state_operations();
621 assert!(ops.contains("2 Tr"));
622 }
623
624 #[test]
625 fn test_rendering_mode_invisible() {
626 let mut context = TextContext::new();
627 context.set_rendering_mode(TextRenderingMode::Invisible);
628
629 let ops = context.generate_text_state_operations();
630 assert!(ops.contains("3 Tr"));
631 }
632
633 #[test]
634 fn test_rendering_mode_fill_clip() {
635 let mut context = TextContext::new();
636 context.set_rendering_mode(TextRenderingMode::FillClip);
637
638 let ops = context.generate_text_state_operations();
639 assert!(ops.contains("4 Tr"));
640 }
641
642 #[test]
643 fn test_rendering_mode_stroke_clip() {
644 let mut context = TextContext::new();
645 context.set_rendering_mode(TextRenderingMode::StrokeClip);
646
647 let ops = context.generate_text_state_operations();
648 assert!(ops.contains("5 Tr"));
649 }
650
651 #[test]
652 fn test_rendering_mode_fill_stroke_clip() {
653 let mut context = TextContext::new();
654 context.set_rendering_mode(TextRenderingMode::FillStrokeClip);
655
656 let ops = context.generate_text_state_operations();
657 assert!(ops.contains("6 Tr"));
658 }
659
660 #[test]
661 fn test_rendering_mode_clip() {
662 let mut context = TextContext::new();
663 context.set_rendering_mode(TextRenderingMode::Clip);
664
665 let ops = context.generate_text_state_operations();
666 assert!(ops.contains("7 Tr"));
667 }
668
669 #[test]
670 fn test_text_state_parameters_chaining() {
671 let mut context = TextContext::new();
672 context
673 .set_character_spacing(1.5)
674 .set_word_spacing(2.0)
675 .set_horizontal_scaling(1.1)
676 .set_leading(14.0)
677 .set_text_rise(0.5)
678 .set_rendering_mode(TextRenderingMode::FillStroke);
679
680 let ops = context.generate_text_state_operations();
681 assert!(ops.contains("1.50 Tc"));
682 assert!(ops.contains("2.00 Tw"));
683 assert!(ops.contains("110.00 Tz"));
684 assert!(ops.contains("14.00 TL"));
685 assert!(ops.contains("0.50 Ts"));
686 assert!(ops.contains("2 Tr"));
687 }
688
689 #[test]
690 fn test_all_text_state_operators_generated() {
691 let mut context = TextContext::new();
692
693 context.set_character_spacing(1.0); context.set_word_spacing(2.0); context.set_horizontal_scaling(1.2); context.set_leading(15.0); context.set_text_rise(1.0); context.set_rendering_mode(TextRenderingMode::Stroke); let ops = context.generate_text_state_operations();
702
703 assert!(
705 ops.contains("Tc"),
706 "Character spacing operator (Tc) not found"
707 );
708 assert!(ops.contains("Tw"), "Word spacing operator (Tw) not found");
709 assert!(
710 ops.contains("Tz"),
711 "Horizontal scaling operator (Tz) not found"
712 );
713 assert!(ops.contains("TL"), "Leading operator (TL) not found");
714 assert!(ops.contains("Ts"), "Text rise operator (Ts) not found");
715 assert!(
716 ops.contains("Tr"),
717 "Text rendering mode operator (Tr) not found"
718 );
719 }
720
721 #[test]
722 fn test_text_color_operations() {
723 use crate::Color;
724
725 let mut context = TextContext::new();
726
727 context.set_fill_color(Color::rgb(1.0, 0.0, 0.0));
729 context.apply_text_state_parameters();
730
731 let ops = context.operations();
732 assert!(
733 ops.contains("1.000 0.000 0.000 rg"),
734 "RGB fill color operator (rg) not found in: {ops}"
735 );
736
737 context.clear();
739 context.set_stroke_color(Color::rgb(0.0, 1.0, 0.0));
740 context.apply_text_state_parameters();
741
742 let ops = context.operations();
743 assert!(
744 ops.contains("0.000 1.000 0.000 RG"),
745 "RGB stroke color operator (RG) not found in: {ops}"
746 );
747
748 context.clear();
750 context.set_fill_color(Color::gray(0.5));
751 context.apply_text_state_parameters();
752
753 let ops = context.operations();
754 assert!(
755 ops.contains("0.500 g"),
756 "Gray fill color operator (g) not found in: {ops}"
757 );
758
759 context.clear();
761 context.set_stroke_color(Color::cmyk(0.2, 0.3, 0.4, 0.1));
762 context.apply_text_state_parameters();
763
764 let ops = context.operations();
765 assert!(
766 ops.contains("0.200 0.300 0.400 0.100 K"),
767 "CMYK stroke color operator (K) not found in: {ops}"
768 );
769
770 context.clear();
772 context.set_fill_color(Color::rgb(1.0, 0.0, 0.0));
773 context.set_stroke_color(Color::rgb(0.0, 0.0, 1.0));
774 context.apply_text_state_parameters();
775
776 let ops = context.operations();
777 assert!(
778 ops.contains("1.000 0.000 0.000 rg") && ops.contains("0.000 0.000 1.000 RG"),
779 "Both fill and stroke colors not found in: {ops}"
780 );
781 }
782}