1pub mod cmap;
2mod encoding;
3mod extraction;
4mod extraction_cmap;
5mod flow;
6mod font;
7pub mod font_manager;
8pub mod fonts;
9mod header_footer;
10mod layout;
11mod list;
12mod metrics;
13pub mod ocr;
14pub mod table;
15
16#[cfg(test)]
17mod cmap_tests;
18
19#[cfg(feature = "ocr-tesseract")]
20pub mod tesseract_provider;
21
22pub use encoding::TextEncoding;
23pub use extraction::{ExtractedText, ExtractionOptions, TextExtractor, TextFragment};
24pub use flow::{TextAlign, TextFlowContext};
25pub use font::{Font, FontEncoding, FontFamily, FontWithEncoding};
26pub use font_manager::{CustomFont, FontDescriptor, FontFlags, FontManager, FontMetrics, FontType};
27pub use header_footer::{HeaderFooter, HeaderFooterOptions, HeaderFooterPosition};
28pub use layout::{ColumnContent, ColumnLayout, ColumnOptions, TextFormat};
29pub use list::{
30 BulletStyle, ListElement, ListItem, ListOptions, ListStyle as ListStyleEnum, OrderedList,
31 OrderedListStyle, UnorderedList,
32};
33pub use metrics::{measure_char, measure_text, split_into_words};
34pub use ocr::{
35 FragmentType, ImagePreprocessing, MockOcrProvider, OcrEngine, OcrError, OcrOptions,
36 OcrProcessingResult, OcrProvider, OcrResult, OcrTextFragment,
37};
38pub use table::{HeaderStyle, Table, TableCell, TableOptions};
39
40use crate::error::Result;
41use crate::Color;
42use std::fmt::Write;
43
44#[derive(Clone, Copy, Debug, PartialEq, Eq)]
46pub enum TextRenderingMode {
47 Fill = 0,
49 Stroke = 1,
51 FillStroke = 2,
53 Invisible = 3,
55 FillClip = 4,
57 StrokeClip = 5,
59 FillStrokeClip = 6,
61 Clip = 7,
63}
64
65#[derive(Clone)]
66pub struct TextContext {
67 operations: String,
68 current_font: Font,
69 font_size: f64,
70 text_matrix: [f64; 6],
71 character_spacing: Option<f64>,
73 word_spacing: Option<f64>,
74 horizontal_scaling: Option<f64>,
75 leading: Option<f64>,
76 text_rise: Option<f64>,
77 rendering_mode: Option<TextRenderingMode>,
78 fill_color: Option<Color>,
80 stroke_color: Option<Color>,
81}
82
83impl Default for TextContext {
84 fn default() -> Self {
85 Self::new()
86 }
87}
88
89impl TextContext {
90 pub fn new() -> Self {
91 Self {
92 operations: String::new(),
93 current_font: Font::Helvetica,
94 font_size: 12.0,
95 text_matrix: [1.0, 0.0, 0.0, 1.0, 0.0, 0.0],
96 character_spacing: None,
97 word_spacing: None,
98 horizontal_scaling: None,
99 leading: None,
100 text_rise: None,
101 rendering_mode: None,
102 fill_color: None,
103 stroke_color: None,
104 }
105 }
106
107 pub fn set_font(&mut self, font: Font, size: f64) -> &mut Self {
108 self.current_font = font;
109 self.font_size = size;
110 self
111 }
112
113 #[allow(dead_code)]
115 pub(crate) fn current_font(&self) -> &Font {
116 &self.current_font
117 }
118
119 pub fn at(&mut self, x: f64, y: f64) -> &mut Self {
120 self.text_matrix[4] = x;
121 self.text_matrix[5] = y;
122 self
123 }
124
125 pub fn write(&mut self, text: &str) -> Result<&mut Self> {
126 self.operations.push_str("BT\n");
128
129 writeln!(
131 &mut self.operations,
132 "/{} {} Tf",
133 self.current_font.pdf_name(),
134 self.font_size
135 )
136 .expect("Writing to String should never fail");
137
138 self.apply_text_state_parameters();
140
141 writeln!(
143 &mut self.operations,
144 "{:.2} {:.2} Td",
145 self.text_matrix[4], self.text_matrix[5]
146 )
147 .expect("Writing to String should never fail");
148
149 let encoding = TextEncoding::WinAnsiEncoding;
151 let encoded_bytes = encoding.encode(text);
152
153 self.operations.push('(');
155 for &byte in &encoded_bytes {
156 match byte {
157 b'(' => self.operations.push_str("\\("),
158 b')' => self.operations.push_str("\\)"),
159 b'\\' => self.operations.push_str("\\\\"),
160 b'\n' => self.operations.push_str("\\n"),
161 b'\r' => self.operations.push_str("\\r"),
162 b'\t' => self.operations.push_str("\\t"),
163 0x20..=0x7E => self.operations.push(byte as char),
165 _ => write!(&mut self.operations, "\\{byte:03o}")
167 .expect("Writing to String should never fail"),
168 }
169 }
170 self.operations.push_str(") Tj\n");
171
172 self.operations.push_str("ET\n");
174
175 Ok(self)
176 }
177
178 pub fn write_line(&mut self, text: &str) -> Result<&mut Self> {
179 self.write(text)?;
180 self.text_matrix[5] -= self.font_size * 1.2; Ok(self)
182 }
183
184 pub fn set_character_spacing(&mut self, spacing: f64) -> &mut Self {
185 self.character_spacing = Some(spacing);
186 self
187 }
188
189 pub fn set_word_spacing(&mut self, spacing: f64) -> &mut Self {
190 self.word_spacing = Some(spacing);
191 self
192 }
193
194 pub fn set_horizontal_scaling(&mut self, scale: f64) -> &mut Self {
195 self.horizontal_scaling = Some(scale);
196 self
197 }
198
199 pub fn set_leading(&mut self, leading: f64) -> &mut Self {
200 self.leading = Some(leading);
201 self
202 }
203
204 pub fn set_text_rise(&mut self, rise: f64) -> &mut Self {
205 self.text_rise = Some(rise);
206 self
207 }
208
209 pub fn set_rendering_mode(&mut self, mode: TextRenderingMode) -> &mut Self {
211 self.rendering_mode = Some(mode);
212 self
213 }
214
215 pub fn set_fill_color(&mut self, color: Color) -> &mut Self {
217 self.fill_color = Some(color);
218 self
219 }
220
221 pub fn set_stroke_color(&mut self, color: Color) -> &mut Self {
223 self.stroke_color = Some(color);
224 self
225 }
226
227 fn apply_text_state_parameters(&mut self) {
229 if let Some(spacing) = self.character_spacing {
231 writeln!(&mut self.operations, "{spacing:.2} Tc")
232 .expect("Writing to String should never fail");
233 }
234
235 if let Some(spacing) = self.word_spacing {
237 writeln!(&mut self.operations, "{spacing:.2} Tw")
238 .expect("Writing to String should never fail");
239 }
240
241 if let Some(scale) = self.horizontal_scaling {
243 writeln!(&mut self.operations, "{:.2} Tz", scale * 100.0)
244 .expect("Writing to String should never fail");
245 }
246
247 if let Some(leading) = self.leading {
249 writeln!(&mut self.operations, "{leading:.2} TL")
250 .expect("Writing to String should never fail");
251 }
252
253 if let Some(rise) = self.text_rise {
255 writeln!(&mut self.operations, "{rise:.2} Ts")
256 .expect("Writing to String should never fail");
257 }
258
259 if let Some(mode) = self.rendering_mode {
261 writeln!(&mut self.operations, "{} Tr", mode as u8)
262 .expect("Writing to String should never fail");
263 }
264
265 if let Some(color) = self.fill_color {
267 match color {
268 Color::Rgb(r, g, b) => {
269 writeln!(&mut self.operations, "{r:.3} {g:.3} {b:.3} rg")
270 .expect("Writing to String should never fail");
271 }
272 Color::Gray(gray) => {
273 writeln!(&mut self.operations, "{gray:.3} g")
274 .expect("Writing to String should never fail");
275 }
276 Color::Cmyk(c, m, y, k) => {
277 writeln!(&mut self.operations, "{c:.3} {m:.3} {y:.3} {k:.3} k")
278 .expect("Writing to String should never fail");
279 }
280 }
281 }
282
283 if let Some(color) = self.stroke_color {
285 match color {
286 Color::Rgb(r, g, b) => {
287 writeln!(&mut self.operations, "{r:.3} {g:.3} {b:.3} RG")
288 .expect("Writing to String should never fail");
289 }
290 Color::Gray(gray) => {
291 writeln!(&mut self.operations, "{gray:.3} G")
292 .expect("Writing to String should never fail");
293 }
294 Color::Cmyk(c, m, y, k) => {
295 writeln!(&mut self.operations, "{c:.3} {m:.3} {y:.3} {k:.3} K")
296 .expect("Writing to String should never fail");
297 }
298 }
299 }
300 }
301
302 pub(crate) fn generate_operations(&self) -> Result<Vec<u8>> {
303 Ok(self.operations.as_bytes().to_vec())
304 }
305
306 pub fn font_size(&self) -> f64 {
308 self.font_size
309 }
310
311 pub fn text_matrix(&self) -> [f64; 6] {
313 self.text_matrix
314 }
315
316 pub fn position(&self) -> (f64, f64) {
318 (self.text_matrix[4], self.text_matrix[5])
319 }
320
321 pub fn clear(&mut self) {
323 self.operations.clear();
324 self.character_spacing = None;
325 self.word_spacing = None;
326 self.horizontal_scaling = None;
327 self.leading = None;
328 self.text_rise = None;
329 self.rendering_mode = None;
330 self.fill_color = None;
331 self.stroke_color = None;
332 }
333
334 pub fn operations(&self) -> &str {
336 &self.operations
337 }
338
339 #[cfg(test)]
341 pub fn generate_text_state_operations(&self) -> String {
342 let mut ops = String::new();
343
344 if let Some(spacing) = self.character_spacing {
346 writeln!(&mut ops, "{spacing:.2} Tc").unwrap();
347 }
348
349 if let Some(spacing) = self.word_spacing {
351 writeln!(&mut ops, "{spacing:.2} Tw").unwrap();
352 }
353
354 if let Some(scale) = self.horizontal_scaling {
356 writeln!(&mut ops, "{:.2} Tz", scale * 100.0).unwrap();
357 }
358
359 if let Some(leading) = self.leading {
361 writeln!(&mut ops, "{leading:.2} TL").unwrap();
362 }
363
364 if let Some(rise) = self.text_rise {
366 writeln!(&mut ops, "{rise:.2} Ts").unwrap();
367 }
368
369 if let Some(mode) = self.rendering_mode {
371 writeln!(&mut ops, "{} Tr", mode as u8).unwrap();
372 }
373
374 ops
375 }
376}
377
378#[cfg(test)]
379mod tests {
380 use super::*;
381
382 #[test]
383 fn test_text_context_new() {
384 let context = TextContext::new();
385 assert_eq!(context.current_font, Font::Helvetica);
386 assert_eq!(context.font_size, 12.0);
387 assert_eq!(context.text_matrix, [1.0, 0.0, 0.0, 1.0, 0.0, 0.0]);
388 assert!(context.operations.is_empty());
389 }
390
391 #[test]
392 fn test_text_context_default() {
393 let context = TextContext::default();
394 assert_eq!(context.current_font, Font::Helvetica);
395 assert_eq!(context.font_size, 12.0);
396 }
397
398 #[test]
399 fn test_set_font() {
400 let mut context = TextContext::new();
401 context.set_font(Font::TimesBold, 14.0);
402 assert_eq!(context.current_font, Font::TimesBold);
403 assert_eq!(context.font_size, 14.0);
404 }
405
406 #[test]
407 fn test_position() {
408 let mut context = TextContext::new();
409 context.at(100.0, 200.0);
410 let (x, y) = context.position();
411 assert_eq!(x, 100.0);
412 assert_eq!(y, 200.0);
413 assert_eq!(context.text_matrix[4], 100.0);
414 assert_eq!(context.text_matrix[5], 200.0);
415 }
416
417 #[test]
418 fn test_write_simple_text() {
419 let mut context = TextContext::new();
420 context.write("Hello").unwrap();
421
422 let ops = context.operations();
423 assert!(ops.contains("BT\n"));
424 assert!(ops.contains("ET\n"));
425 assert!(ops.contains("/Helvetica 12 Tf"));
426 assert!(ops.contains("(Hello) Tj"));
427 }
428
429 #[test]
430 fn test_write_text_with_escaping() {
431 let mut context = TextContext::new();
432 context.write("(Hello)").unwrap();
433
434 let ops = context.operations();
435 assert!(ops.contains("(\\(Hello\\)) Tj"));
436 }
437
438 #[test]
439 fn test_write_line() {
440 let mut context = TextContext::new();
441 let initial_y = context.text_matrix[5];
442 context.write_line("Line 1").unwrap();
443
444 let new_y = context.text_matrix[5];
446 assert!(new_y < initial_y);
447 assert_eq!(new_y, initial_y - 12.0 * 1.2); }
449
450 #[test]
451 fn test_character_spacing() {
452 let mut context = TextContext::new();
453 context.set_character_spacing(2.5);
454
455 let ops = context.generate_text_state_operations();
456 assert!(ops.contains("2.50 Tc"));
457 }
458
459 #[test]
460 fn test_word_spacing() {
461 let mut context = TextContext::new();
462 context.set_word_spacing(1.5);
463
464 let ops = context.generate_text_state_operations();
465 assert!(ops.contains("1.50 Tw"));
466 }
467
468 #[test]
469 fn test_horizontal_scaling() {
470 let mut context = TextContext::new();
471 context.set_horizontal_scaling(1.25);
472
473 let ops = context.generate_text_state_operations();
474 assert!(ops.contains("125.00 Tz")); }
476
477 #[test]
478 fn test_leading() {
479 let mut context = TextContext::new();
480 context.set_leading(15.0);
481
482 let ops = context.generate_text_state_operations();
483 assert!(ops.contains("15.00 TL"));
484 }
485
486 #[test]
487 fn test_text_rise() {
488 let mut context = TextContext::new();
489 context.set_text_rise(3.0);
490
491 let ops = context.generate_text_state_operations();
492 assert!(ops.contains("3.00 Ts"));
493 }
494
495 #[test]
496 fn test_clear() {
497 let mut context = TextContext::new();
498 context.write("Hello").unwrap();
499 assert!(!context.operations().is_empty());
500
501 context.clear();
502 assert!(context.operations().is_empty());
503 }
504
505 #[test]
506 fn test_generate_operations() {
507 let mut context = TextContext::new();
508 context.write("Test").unwrap();
509
510 let ops_bytes = context.generate_operations().unwrap();
511 let ops_string = String::from_utf8(ops_bytes).unwrap();
512 assert_eq!(ops_string, context.operations());
513 }
514
515 #[test]
516 fn test_method_chaining() {
517 let mut context = TextContext::new();
518 context
519 .set_font(Font::Courier, 10.0)
520 .at(50.0, 100.0)
521 .set_character_spacing(1.0)
522 .set_word_spacing(2.0);
523
524 assert_eq!(context.current_font(), &Font::Courier);
525 assert_eq!(context.font_size(), 10.0);
526 let (x, y) = context.position();
527 assert_eq!(x, 50.0);
528 assert_eq!(y, 100.0);
529 }
530
531 #[test]
532 fn test_text_matrix_access() {
533 let mut context = TextContext::new();
534 context.at(25.0, 75.0);
535
536 let matrix = context.text_matrix();
537 assert_eq!(matrix, [1.0, 0.0, 0.0, 1.0, 25.0, 75.0]);
538 }
539
540 #[test]
541 fn test_special_characters_encoding() {
542 let mut context = TextContext::new();
543 context.write("Test\nLine\tTab").unwrap();
544
545 let ops = context.operations();
546 assert!(ops.contains("\\n"));
547 assert!(ops.contains("\\t"));
548 }
549
550 #[test]
551 fn test_rendering_mode_fill() {
552 let mut context = TextContext::new();
553 context.set_rendering_mode(TextRenderingMode::Fill);
554
555 let ops = context.generate_text_state_operations();
556 assert!(ops.contains("0 Tr"));
557 }
558
559 #[test]
560 fn test_rendering_mode_stroke() {
561 let mut context = TextContext::new();
562 context.set_rendering_mode(TextRenderingMode::Stroke);
563
564 let ops = context.generate_text_state_operations();
565 assert!(ops.contains("1 Tr"));
566 }
567
568 #[test]
569 fn test_rendering_mode_fill_stroke() {
570 let mut context = TextContext::new();
571 context.set_rendering_mode(TextRenderingMode::FillStroke);
572
573 let ops = context.generate_text_state_operations();
574 assert!(ops.contains("2 Tr"));
575 }
576
577 #[test]
578 fn test_rendering_mode_invisible() {
579 let mut context = TextContext::new();
580 context.set_rendering_mode(TextRenderingMode::Invisible);
581
582 let ops = context.generate_text_state_operations();
583 assert!(ops.contains("3 Tr"));
584 }
585
586 #[test]
587 fn test_rendering_mode_fill_clip() {
588 let mut context = TextContext::new();
589 context.set_rendering_mode(TextRenderingMode::FillClip);
590
591 let ops = context.generate_text_state_operations();
592 assert!(ops.contains("4 Tr"));
593 }
594
595 #[test]
596 fn test_rendering_mode_stroke_clip() {
597 let mut context = TextContext::new();
598 context.set_rendering_mode(TextRenderingMode::StrokeClip);
599
600 let ops = context.generate_text_state_operations();
601 assert!(ops.contains("5 Tr"));
602 }
603
604 #[test]
605 fn test_rendering_mode_fill_stroke_clip() {
606 let mut context = TextContext::new();
607 context.set_rendering_mode(TextRenderingMode::FillStrokeClip);
608
609 let ops = context.generate_text_state_operations();
610 assert!(ops.contains("6 Tr"));
611 }
612
613 #[test]
614 fn test_rendering_mode_clip() {
615 let mut context = TextContext::new();
616 context.set_rendering_mode(TextRenderingMode::Clip);
617
618 let ops = context.generate_text_state_operations();
619 assert!(ops.contains("7 Tr"));
620 }
621
622 #[test]
623 fn test_text_state_parameters_chaining() {
624 let mut context = TextContext::new();
625 context
626 .set_character_spacing(1.5)
627 .set_word_spacing(2.0)
628 .set_horizontal_scaling(1.1)
629 .set_leading(14.0)
630 .set_text_rise(0.5)
631 .set_rendering_mode(TextRenderingMode::FillStroke);
632
633 let ops = context.generate_text_state_operations();
634 assert!(ops.contains("1.50 Tc"));
635 assert!(ops.contains("2.00 Tw"));
636 assert!(ops.contains("110.00 Tz"));
637 assert!(ops.contains("14.00 TL"));
638 assert!(ops.contains("0.50 Ts"));
639 assert!(ops.contains("2 Tr"));
640 }
641
642 #[test]
643 fn test_all_text_state_operators_generated() {
644 let mut context = TextContext::new();
645
646 context.set_character_spacing(1.0); context.set_word_spacing(2.0); context.set_horizontal_scaling(1.2); context.set_leading(15.0); context.set_text_rise(1.0); context.set_rendering_mode(TextRenderingMode::Stroke); let ops = context.generate_text_state_operations();
655
656 assert!(
658 ops.contains("Tc"),
659 "Character spacing operator (Tc) not found"
660 );
661 assert!(ops.contains("Tw"), "Word spacing operator (Tw) not found");
662 assert!(
663 ops.contains("Tz"),
664 "Horizontal scaling operator (Tz) not found"
665 );
666 assert!(ops.contains("TL"), "Leading operator (TL) not found");
667 assert!(ops.contains("Ts"), "Text rise operator (Ts) not found");
668 assert!(
669 ops.contains("Tr"),
670 "Text rendering mode operator (Tr) not found"
671 );
672 }
673
674 #[test]
675 fn test_text_color_operations() {
676 use crate::Color;
677
678 let mut context = TextContext::new();
679
680 context.set_fill_color(Color::rgb(1.0, 0.0, 0.0));
682 context.apply_text_state_parameters();
683
684 let ops = context.operations();
685 assert!(
686 ops.contains("1.000 0.000 0.000 rg"),
687 "RGB fill color operator (rg) not found in: {ops}"
688 );
689
690 context.clear();
692 context.set_stroke_color(Color::rgb(0.0, 1.0, 0.0));
693 context.apply_text_state_parameters();
694
695 let ops = context.operations();
696 assert!(
697 ops.contains("0.000 1.000 0.000 RG"),
698 "RGB stroke color operator (RG) not found in: {ops}"
699 );
700
701 context.clear();
703 context.set_fill_color(Color::gray(0.5));
704 context.apply_text_state_parameters();
705
706 let ops = context.operations();
707 assert!(
708 ops.contains("0.500 g"),
709 "Gray fill color operator (g) not found in: {ops}"
710 );
711
712 context.clear();
714 context.set_stroke_color(Color::cmyk(0.2, 0.3, 0.4, 0.1));
715 context.apply_text_state_parameters();
716
717 let ops = context.operations();
718 assert!(
719 ops.contains("0.200 0.300 0.400 0.100 K"),
720 "CMYK stroke color operator (K) not found in: {ops}"
721 );
722
723 context.clear();
725 context.set_fill_color(Color::rgb(1.0, 0.0, 0.0));
726 context.set_stroke_color(Color::rgb(0.0, 0.0, 1.0));
727 context.apply_text_state_parameters();
728
729 let ops = context.operations();
730 assert!(
731 ops.contains("1.000 0.000 0.000 rg") && ops.contains("0.000 0.000 1.000 RG"),
732 "Both fill and stroke colors not found in: {ops}"
733 );
734 }
735}