1pub mod cmap;
2mod encoding;
3mod extraction;
4mod extraction_cmap;
5mod flow;
6mod font;
7pub mod font_manager;
8pub mod fonts;
9mod header_footer;
10mod layout;
11mod list;
12pub mod metrics;
13pub mod ocr;
14pub mod table;
15pub mod validation;
16
17#[cfg(test)]
18mod cmap_tests;
19
20#[cfg(feature = "ocr-tesseract")]
21pub mod tesseract_provider;
22
23pub use encoding::TextEncoding;
24pub use extraction::{ExtractedText, ExtractionOptions, TextExtractor, TextFragment};
25pub use flow::{TextAlign, TextFlowContext};
26pub use font::{Font, FontEncoding, FontFamily, FontWithEncoding};
27pub use font_manager::{CustomFont, FontDescriptor, FontFlags, FontManager, FontMetrics, FontType};
28pub use header_footer::{HeaderFooter, HeaderFooterOptions, HeaderFooterPosition};
29pub use layout::{ColumnContent, ColumnLayout, ColumnOptions, TextFormat};
30pub use list::{
31 BulletStyle, ListElement, ListItem, ListOptions, ListStyle as ListStyleEnum, OrderedList,
32 OrderedListStyle, UnorderedList,
33};
34pub use metrics::{measure_char, measure_text, split_into_words};
35pub use ocr::{
36 CharacterConfidence, CorrectionCandidate, CorrectionReason, CorrectionSuggestion,
37 CorrectionType, FragmentType, ImagePreprocessing, MockOcrProvider, OcrEngine, OcrError,
38 OcrOptions, OcrPostProcessor, OcrProcessingResult, OcrProvider, OcrRegion, OcrResult,
39 OcrTextFragment, WordConfidence,
40};
41pub use table::{HeaderStyle, Table, TableCell, TableOptions};
42pub use validation::{MatchType, TextMatch, TextValidationResult, TextValidator};
43
44#[cfg(feature = "ocr-tesseract")]
45pub use tesseract_provider::{RustyTesseractConfig, RustyTesseractProvider};
46
47use crate::error::Result;
48use crate::Color;
49use std::fmt::Write;
50
51#[derive(Clone, Copy, Debug, PartialEq, Eq)]
53pub enum TextRenderingMode {
54 Fill = 0,
56 Stroke = 1,
58 FillStroke = 2,
60 Invisible = 3,
62 FillClip = 4,
64 StrokeClip = 5,
66 FillStrokeClip = 6,
68 Clip = 7,
70}
71
72#[derive(Clone)]
73pub struct TextContext {
74 operations: String,
75 current_font: Font,
76 font_size: f64,
77 text_matrix: [f64; 6],
78 pending_position: Option<(f64, f64)>,
80 character_spacing: Option<f64>,
82 word_spacing: Option<f64>,
83 horizontal_scaling: Option<f64>,
84 leading: Option<f64>,
85 text_rise: Option<f64>,
86 rendering_mode: Option<TextRenderingMode>,
87 fill_color: Option<Color>,
89 stroke_color: Option<Color>,
90}
91
92impl Default for TextContext {
93 fn default() -> Self {
94 Self::new()
95 }
96}
97
98impl TextContext {
99 pub fn new() -> Self {
100 Self {
101 operations: String::new(),
102 current_font: Font::Helvetica,
103 font_size: 12.0,
104 text_matrix: [1.0, 0.0, 0.0, 1.0, 0.0, 0.0],
105 pending_position: None,
106 character_spacing: None,
107 word_spacing: None,
108 horizontal_scaling: None,
109 leading: None,
110 text_rise: None,
111 rendering_mode: None,
112 fill_color: None,
113 stroke_color: None,
114 }
115 }
116
117 pub fn set_font(&mut self, font: Font, size: f64) -> &mut Self {
118 self.current_font = font;
119 self.font_size = size;
120 self
121 }
122
123 #[allow(dead_code)]
125 pub(crate) fn current_font(&self) -> &Font {
126 &self.current_font
127 }
128
129 pub fn at(&mut self, x: f64, y: f64) -> &mut Self {
130 self.text_matrix[4] = x;
132 self.text_matrix[5] = y;
133 self.pending_position = Some((x, y));
134 self
135 }
136
137 pub fn write(&mut self, text: &str) -> Result<&mut Self> {
138 self.operations.push_str("BT\n");
140
141 writeln!(
143 &mut self.operations,
144 "/{} {} Tf",
145 self.current_font.pdf_name(),
146 self.font_size
147 )
148 .expect("Writing to String should never fail");
149
150 self.apply_text_state_parameters();
152
153 let (x, y) = if let Some((px, py)) = self.pending_position.take() {
155 (px, py)
157 } else {
158 (self.text_matrix[4], self.text_matrix[5])
160 };
161
162 writeln!(&mut self.operations, "{:.2} {:.2} Td", x, y)
163 .expect("Writing to String should never fail");
164
165 let encoding = TextEncoding::WinAnsiEncoding;
167 let encoded_bytes = encoding.encode(text);
168
169 self.operations.push('(');
171 for &byte in &encoded_bytes {
172 match byte {
173 b'(' => self.operations.push_str("\\("),
174 b')' => self.operations.push_str("\\)"),
175 b'\\' => self.operations.push_str("\\\\"),
176 b'\n' => self.operations.push_str("\\n"),
177 b'\r' => self.operations.push_str("\\r"),
178 b'\t' => self.operations.push_str("\\t"),
179 0x20..=0x7E => self.operations.push(byte as char),
181 _ => write!(&mut self.operations, "\\{byte:03o}")
183 .expect("Writing to String should never fail"),
184 }
185 }
186 self.operations.push_str(") Tj\n");
187
188 self.operations.push_str("ET\n");
190
191 Ok(self)
192 }
193
194 pub fn write_line(&mut self, text: &str) -> Result<&mut Self> {
195 self.write(text)?;
196 self.text_matrix[5] -= self.font_size * 1.2; Ok(self)
198 }
199
200 pub fn set_character_spacing(&mut self, spacing: f64) -> &mut Self {
201 self.character_spacing = Some(spacing);
202 self
203 }
204
205 pub fn set_word_spacing(&mut self, spacing: f64) -> &mut Self {
206 self.word_spacing = Some(spacing);
207 self
208 }
209
210 pub fn set_horizontal_scaling(&mut self, scale: f64) -> &mut Self {
211 self.horizontal_scaling = Some(scale);
212 self
213 }
214
215 pub fn set_leading(&mut self, leading: f64) -> &mut Self {
216 self.leading = Some(leading);
217 self
218 }
219
220 pub fn set_text_rise(&mut self, rise: f64) -> &mut Self {
221 self.text_rise = Some(rise);
222 self
223 }
224
225 pub fn set_rendering_mode(&mut self, mode: TextRenderingMode) -> &mut Self {
227 self.rendering_mode = Some(mode);
228 self
229 }
230
231 pub fn set_fill_color(&mut self, color: Color) -> &mut Self {
233 self.fill_color = Some(color);
234 self
235 }
236
237 pub fn set_stroke_color(&mut self, color: Color) -> &mut Self {
239 self.stroke_color = Some(color);
240 self
241 }
242
243 fn apply_text_state_parameters(&mut self) {
245 if let Some(spacing) = self.character_spacing {
247 writeln!(&mut self.operations, "{spacing:.2} Tc")
248 .expect("Writing to String should never fail");
249 }
250
251 if let Some(spacing) = self.word_spacing {
253 writeln!(&mut self.operations, "{spacing:.2} Tw")
254 .expect("Writing to String should never fail");
255 }
256
257 if let Some(scale) = self.horizontal_scaling {
259 writeln!(&mut self.operations, "{:.2} Tz", scale * 100.0)
260 .expect("Writing to String should never fail");
261 }
262
263 if let Some(leading) = self.leading {
265 writeln!(&mut self.operations, "{leading:.2} TL")
266 .expect("Writing to String should never fail");
267 }
268
269 if let Some(rise) = self.text_rise {
271 writeln!(&mut self.operations, "{rise:.2} Ts")
272 .expect("Writing to String should never fail");
273 }
274
275 if let Some(mode) = self.rendering_mode {
277 writeln!(&mut self.operations, "{} Tr", mode as u8)
278 .expect("Writing to String should never fail");
279 }
280
281 if let Some(color) = self.fill_color {
283 match color {
284 Color::Rgb(r, g, b) => {
285 writeln!(&mut self.operations, "{r:.3} {g:.3} {b:.3} rg")
286 .expect("Writing to String should never fail");
287 }
288 Color::Gray(gray) => {
289 writeln!(&mut self.operations, "{gray:.3} g")
290 .expect("Writing to String should never fail");
291 }
292 Color::Cmyk(c, m, y, k) => {
293 writeln!(&mut self.operations, "{c:.3} {m:.3} {y:.3} {k:.3} k")
294 .expect("Writing to String should never fail");
295 }
296 }
297 }
298
299 if let Some(color) = self.stroke_color {
301 match color {
302 Color::Rgb(r, g, b) => {
303 writeln!(&mut self.operations, "{r:.3} {g:.3} {b:.3} RG")
304 .expect("Writing to String should never fail");
305 }
306 Color::Gray(gray) => {
307 writeln!(&mut self.operations, "{gray:.3} G")
308 .expect("Writing to String should never fail");
309 }
310 Color::Cmyk(c, m, y, k) => {
311 writeln!(&mut self.operations, "{c:.3} {m:.3} {y:.3} {k:.3} K")
312 .expect("Writing to String should never fail");
313 }
314 }
315 }
316 }
317
318 pub(crate) fn generate_operations(&self) -> Result<Vec<u8>> {
319 Ok(self.operations.as_bytes().to_vec())
320 }
321
322 pub fn font_size(&self) -> f64 {
324 self.font_size
325 }
326
327 pub fn text_matrix(&self) -> [f64; 6] {
329 self.text_matrix
330 }
331
332 pub fn position(&self) -> (f64, f64) {
334 (self.text_matrix[4], self.text_matrix[5])
335 }
336
337 pub fn clear(&mut self) {
339 self.operations.clear();
340 self.character_spacing = None;
341 self.word_spacing = None;
342 self.horizontal_scaling = None;
343 self.leading = None;
344 self.text_rise = None;
345 self.rendering_mode = None;
346 self.fill_color = None;
347 self.stroke_color = None;
348 }
349
350 pub fn operations(&self) -> &str {
352 &self.operations
353 }
354
355 #[cfg(test)]
357 pub fn generate_text_state_operations(&self) -> String {
358 let mut ops = String::new();
359
360 if let Some(spacing) = self.character_spacing {
362 writeln!(&mut ops, "{spacing:.2} Tc").unwrap();
363 }
364
365 if let Some(spacing) = self.word_spacing {
367 writeln!(&mut ops, "{spacing:.2} Tw").unwrap();
368 }
369
370 if let Some(scale) = self.horizontal_scaling {
372 writeln!(&mut ops, "{:.2} Tz", scale * 100.0).unwrap();
373 }
374
375 if let Some(leading) = self.leading {
377 writeln!(&mut ops, "{leading:.2} TL").unwrap();
378 }
379
380 if let Some(rise) = self.text_rise {
382 writeln!(&mut ops, "{rise:.2} Ts").unwrap();
383 }
384
385 if let Some(mode) = self.rendering_mode {
387 writeln!(&mut ops, "{} Tr", mode as u8).unwrap();
388 }
389
390 ops
391 }
392}
393
394#[cfg(test)]
395mod tests {
396 use super::*;
397
398 #[test]
399 fn test_text_context_new() {
400 let context = TextContext::new();
401 assert_eq!(context.current_font, Font::Helvetica);
402 assert_eq!(context.font_size, 12.0);
403 assert_eq!(context.text_matrix, [1.0, 0.0, 0.0, 1.0, 0.0, 0.0]);
404 assert!(context.operations.is_empty());
405 }
406
407 #[test]
408 fn test_text_context_default() {
409 let context = TextContext::default();
410 assert_eq!(context.current_font, Font::Helvetica);
411 assert_eq!(context.font_size, 12.0);
412 }
413
414 #[test]
415 fn test_set_font() {
416 let mut context = TextContext::new();
417 context.set_font(Font::TimesBold, 14.0);
418 assert_eq!(context.current_font, Font::TimesBold);
419 assert_eq!(context.font_size, 14.0);
420 }
421
422 #[test]
423 fn test_position() {
424 let mut context = TextContext::new();
425 context.at(100.0, 200.0);
426 let (x, y) = context.position();
427 assert_eq!(x, 100.0);
428 assert_eq!(y, 200.0);
429 assert_eq!(context.text_matrix[4], 100.0);
430 assert_eq!(context.text_matrix[5], 200.0);
431 }
432
433 #[test]
434 fn test_write_simple_text() {
435 let mut context = TextContext::new();
436 context.write("Hello").unwrap();
437
438 let ops = context.operations();
439 assert!(ops.contains("BT\n"));
440 assert!(ops.contains("ET\n"));
441 assert!(ops.contains("/Helvetica 12 Tf"));
442 assert!(ops.contains("(Hello) Tj"));
443 }
444
445 #[test]
446 fn test_write_text_with_escaping() {
447 let mut context = TextContext::new();
448 context.write("(Hello)").unwrap();
449
450 let ops = context.operations();
451 assert!(ops.contains("(\\(Hello\\)) Tj"));
452 }
453
454 #[test]
455 fn test_write_line() {
456 let mut context = TextContext::new();
457 let initial_y = context.text_matrix[5];
458 context.write_line("Line 1").unwrap();
459
460 let new_y = context.text_matrix[5];
462 assert!(new_y < initial_y);
463 assert_eq!(new_y, initial_y - 12.0 * 1.2); }
465
466 #[test]
467 fn test_character_spacing() {
468 let mut context = TextContext::new();
469 context.set_character_spacing(2.5);
470
471 let ops = context.generate_text_state_operations();
472 assert!(ops.contains("2.50 Tc"));
473 }
474
475 #[test]
476 fn test_word_spacing() {
477 let mut context = TextContext::new();
478 context.set_word_spacing(1.5);
479
480 let ops = context.generate_text_state_operations();
481 assert!(ops.contains("1.50 Tw"));
482 }
483
484 #[test]
485 fn test_horizontal_scaling() {
486 let mut context = TextContext::new();
487 context.set_horizontal_scaling(1.25);
488
489 let ops = context.generate_text_state_operations();
490 assert!(ops.contains("125.00 Tz")); }
492
493 #[test]
494 fn test_leading() {
495 let mut context = TextContext::new();
496 context.set_leading(15.0);
497
498 let ops = context.generate_text_state_operations();
499 assert!(ops.contains("15.00 TL"));
500 }
501
502 #[test]
503 fn test_text_rise() {
504 let mut context = TextContext::new();
505 context.set_text_rise(3.0);
506
507 let ops = context.generate_text_state_operations();
508 assert!(ops.contains("3.00 Ts"));
509 }
510
511 #[test]
512 fn test_clear() {
513 let mut context = TextContext::new();
514 context.write("Hello").unwrap();
515 assert!(!context.operations().is_empty());
516
517 context.clear();
518 assert!(context.operations().is_empty());
519 }
520
521 #[test]
522 fn test_generate_operations() {
523 let mut context = TextContext::new();
524 context.write("Test").unwrap();
525
526 let ops_bytes = context.generate_operations().unwrap();
527 let ops_string = String::from_utf8(ops_bytes).unwrap();
528 assert_eq!(ops_string, context.operations());
529 }
530
531 #[test]
532 fn test_method_chaining() {
533 let mut context = TextContext::new();
534 context
535 .set_font(Font::Courier, 10.0)
536 .at(50.0, 100.0)
537 .set_character_spacing(1.0)
538 .set_word_spacing(2.0);
539
540 assert_eq!(context.current_font(), &Font::Courier);
541 assert_eq!(context.font_size(), 10.0);
542 let (x, y) = context.position();
543 assert_eq!(x, 50.0);
544 assert_eq!(y, 100.0);
545 }
546
547 #[test]
548 fn test_text_matrix_access() {
549 let mut context = TextContext::new();
550 context.at(25.0, 75.0);
551
552 let matrix = context.text_matrix();
553 assert_eq!(matrix, [1.0, 0.0, 0.0, 1.0, 25.0, 75.0]);
554 }
555
556 #[test]
557 fn test_special_characters_encoding() {
558 let mut context = TextContext::new();
559 context.write("Test\nLine\tTab").unwrap();
560
561 let ops = context.operations();
562 assert!(ops.contains("\\n"));
563 assert!(ops.contains("\\t"));
564 }
565
566 #[test]
567 fn test_rendering_mode_fill() {
568 let mut context = TextContext::new();
569 context.set_rendering_mode(TextRenderingMode::Fill);
570
571 let ops = context.generate_text_state_operations();
572 assert!(ops.contains("0 Tr"));
573 }
574
575 #[test]
576 fn test_rendering_mode_stroke() {
577 let mut context = TextContext::new();
578 context.set_rendering_mode(TextRenderingMode::Stroke);
579
580 let ops = context.generate_text_state_operations();
581 assert!(ops.contains("1 Tr"));
582 }
583
584 #[test]
585 fn test_rendering_mode_fill_stroke() {
586 let mut context = TextContext::new();
587 context.set_rendering_mode(TextRenderingMode::FillStroke);
588
589 let ops = context.generate_text_state_operations();
590 assert!(ops.contains("2 Tr"));
591 }
592
593 #[test]
594 fn test_rendering_mode_invisible() {
595 let mut context = TextContext::new();
596 context.set_rendering_mode(TextRenderingMode::Invisible);
597
598 let ops = context.generate_text_state_operations();
599 assert!(ops.contains("3 Tr"));
600 }
601
602 #[test]
603 fn test_rendering_mode_fill_clip() {
604 let mut context = TextContext::new();
605 context.set_rendering_mode(TextRenderingMode::FillClip);
606
607 let ops = context.generate_text_state_operations();
608 assert!(ops.contains("4 Tr"));
609 }
610
611 #[test]
612 fn test_rendering_mode_stroke_clip() {
613 let mut context = TextContext::new();
614 context.set_rendering_mode(TextRenderingMode::StrokeClip);
615
616 let ops = context.generate_text_state_operations();
617 assert!(ops.contains("5 Tr"));
618 }
619
620 #[test]
621 fn test_rendering_mode_fill_stroke_clip() {
622 let mut context = TextContext::new();
623 context.set_rendering_mode(TextRenderingMode::FillStrokeClip);
624
625 let ops = context.generate_text_state_operations();
626 assert!(ops.contains("6 Tr"));
627 }
628
629 #[test]
630 fn test_rendering_mode_clip() {
631 let mut context = TextContext::new();
632 context.set_rendering_mode(TextRenderingMode::Clip);
633
634 let ops = context.generate_text_state_operations();
635 assert!(ops.contains("7 Tr"));
636 }
637
638 #[test]
639 fn test_text_state_parameters_chaining() {
640 let mut context = TextContext::new();
641 context
642 .set_character_spacing(1.5)
643 .set_word_spacing(2.0)
644 .set_horizontal_scaling(1.1)
645 .set_leading(14.0)
646 .set_text_rise(0.5)
647 .set_rendering_mode(TextRenderingMode::FillStroke);
648
649 let ops = context.generate_text_state_operations();
650 assert!(ops.contains("1.50 Tc"));
651 assert!(ops.contains("2.00 Tw"));
652 assert!(ops.contains("110.00 Tz"));
653 assert!(ops.contains("14.00 TL"));
654 assert!(ops.contains("0.50 Ts"));
655 assert!(ops.contains("2 Tr"));
656 }
657
658 #[test]
659 fn test_all_text_state_operators_generated() {
660 let mut context = TextContext::new();
661
662 context.set_character_spacing(1.0); context.set_word_spacing(2.0); context.set_horizontal_scaling(1.2); context.set_leading(15.0); context.set_text_rise(1.0); context.set_rendering_mode(TextRenderingMode::Stroke); let ops = context.generate_text_state_operations();
671
672 assert!(
674 ops.contains("Tc"),
675 "Character spacing operator (Tc) not found"
676 );
677 assert!(ops.contains("Tw"), "Word spacing operator (Tw) not found");
678 assert!(
679 ops.contains("Tz"),
680 "Horizontal scaling operator (Tz) not found"
681 );
682 assert!(ops.contains("TL"), "Leading operator (TL) not found");
683 assert!(ops.contains("Ts"), "Text rise operator (Ts) not found");
684 assert!(
685 ops.contains("Tr"),
686 "Text rendering mode operator (Tr) not found"
687 );
688 }
689
690 #[test]
691 fn test_text_color_operations() {
692 use crate::Color;
693
694 let mut context = TextContext::new();
695
696 context.set_fill_color(Color::rgb(1.0, 0.0, 0.0));
698 context.apply_text_state_parameters();
699
700 let ops = context.operations();
701 assert!(
702 ops.contains("1.000 0.000 0.000 rg"),
703 "RGB fill color operator (rg) not found in: {ops}"
704 );
705
706 context.clear();
708 context.set_stroke_color(Color::rgb(0.0, 1.0, 0.0));
709 context.apply_text_state_parameters();
710
711 let ops = context.operations();
712 assert!(
713 ops.contains("0.000 1.000 0.000 RG"),
714 "RGB stroke color operator (RG) not found in: {ops}"
715 );
716
717 context.clear();
719 context.set_fill_color(Color::gray(0.5));
720 context.apply_text_state_parameters();
721
722 let ops = context.operations();
723 assert!(
724 ops.contains("0.500 g"),
725 "Gray fill color operator (g) not found in: {ops}"
726 );
727
728 context.clear();
730 context.set_stroke_color(Color::cmyk(0.2, 0.3, 0.4, 0.1));
731 context.apply_text_state_parameters();
732
733 let ops = context.operations();
734 assert!(
735 ops.contains("0.200 0.300 0.400 0.100 K"),
736 "CMYK stroke color operator (K) not found in: {ops}"
737 );
738
739 context.clear();
741 context.set_fill_color(Color::rgb(1.0, 0.0, 0.0));
742 context.set_stroke_color(Color::rgb(0.0, 0.0, 1.0));
743 context.apply_text_state_parameters();
744
745 let ops = context.operations();
746 assert!(
747 ops.contains("1.000 0.000 0.000 rg") && ops.contains("0.000 0.000 1.000 RG"),
748 "Both fill and stroke colors not found in: {ops}"
749 );
750 }
751}