oxidize_pdf/text/
mod.rs

1pub mod cmap;
2mod encoding;
3mod extraction;
4mod extraction_cmap;
5mod flow;
6mod font;
7pub mod font_manager;
8pub mod fonts;
9mod header_footer;
10mod layout;
11mod list;
12mod metrics;
13pub mod ocr;
14pub mod table;
15
16#[cfg(test)]
17mod cmap_tests;
18
19#[cfg(feature = "ocr-tesseract")]
20pub mod tesseract_provider;
21
22pub use encoding::TextEncoding;
23pub use extraction::{ExtractedText, ExtractionOptions, TextExtractor, TextFragment};
24pub use flow::{TextAlign, TextFlowContext};
25pub use font::{Font, FontEncoding, FontFamily, FontWithEncoding};
26pub use font_manager::{CustomFont, FontDescriptor, FontFlags, FontManager, FontMetrics, FontType};
27pub use header_footer::{HeaderFooter, HeaderFooterOptions, HeaderFooterPosition};
28pub use layout::{ColumnContent, ColumnLayout, ColumnOptions, TextFormat};
29pub use list::{
30    BulletStyle, ListElement, ListItem, ListOptions, ListStyle as ListStyleEnum, OrderedList,
31    OrderedListStyle, UnorderedList,
32};
33pub use metrics::{measure_char, measure_text, split_into_words};
34pub use ocr::{
35    FragmentType, ImagePreprocessing, MockOcrProvider, OcrEngine, OcrError, OcrOptions,
36    OcrProcessingResult, OcrProvider, OcrResult, OcrTextFragment,
37};
38pub use table::{HeaderStyle, Table, TableCell, TableOptions};
39
40use crate::error::Result;
41use crate::Color;
42use std::fmt::Write;
43
44/// Text rendering mode for PDF text operations
45#[derive(Clone, Copy, Debug, PartialEq, Eq)]
46pub enum TextRenderingMode {
47    /// Fill text (default)
48    Fill = 0,
49    /// Stroke text
50    Stroke = 1,
51    /// Fill and stroke text
52    FillStroke = 2,
53    /// Invisible text (for searchable text over images)
54    Invisible = 3,
55    /// Fill text and add to path for clipping
56    FillClip = 4,
57    /// Stroke text and add to path for clipping
58    StrokeClip = 5,
59    /// Fill and stroke text and add to path for clipping
60    FillStrokeClip = 6,
61    /// Add text to path for clipping (invisible)
62    Clip = 7,
63}
64
65#[derive(Clone)]
66pub struct TextContext {
67    operations: String,
68    current_font: Font,
69    font_size: f64,
70    text_matrix: [f64; 6],
71    // Text state parameters
72    character_spacing: Option<f64>,
73    word_spacing: Option<f64>,
74    horizontal_scaling: Option<f64>,
75    leading: Option<f64>,
76    text_rise: Option<f64>,
77    rendering_mode: Option<TextRenderingMode>,
78    // Color parameters
79    fill_color: Option<Color>,
80    stroke_color: Option<Color>,
81}
82
83impl Default for TextContext {
84    fn default() -> Self {
85        Self::new()
86    }
87}
88
89impl TextContext {
90    pub fn new() -> Self {
91        Self {
92            operations: String::new(),
93            current_font: Font::Helvetica,
94            font_size: 12.0,
95            text_matrix: [1.0, 0.0, 0.0, 1.0, 0.0, 0.0],
96            character_spacing: None,
97            word_spacing: None,
98            horizontal_scaling: None,
99            leading: None,
100            text_rise: None,
101            rendering_mode: None,
102            fill_color: None,
103            stroke_color: None,
104        }
105    }
106
107    pub fn set_font(&mut self, font: Font, size: f64) -> &mut Self {
108        self.current_font = font;
109        self.font_size = size;
110        self
111    }
112
113    /// Get the current font
114    #[allow(dead_code)]
115    pub(crate) fn current_font(&self) -> &Font {
116        &self.current_font
117    }
118
119    pub fn at(&mut self, x: f64, y: f64) -> &mut Self {
120        self.text_matrix[4] = x;
121        self.text_matrix[5] = y;
122        self
123    }
124
125    pub fn write(&mut self, text: &str) -> Result<&mut Self> {
126        // Begin text object
127        self.operations.push_str("BT\n");
128
129        // Set font
130        writeln!(
131            &mut self.operations,
132            "/{} {} Tf",
133            self.current_font.pdf_name(),
134            self.font_size
135        )
136        .expect("Writing to String should never fail");
137
138        // Apply text state parameters
139        self.apply_text_state_parameters();
140
141        // Set text position
142        writeln!(
143            &mut self.operations,
144            "{:.2} {:.2} Td",
145            self.text_matrix[4], self.text_matrix[5]
146        )
147        .expect("Writing to String should never fail");
148
149        // Encode text using WinAnsiEncoding
150        let encoding = TextEncoding::WinAnsiEncoding;
151        let encoded_bytes = encoding.encode(text);
152
153        // Show text as a literal string
154        self.operations.push('(');
155        for &byte in &encoded_bytes {
156            match byte {
157                b'(' => self.operations.push_str("\\("),
158                b')' => self.operations.push_str("\\)"),
159                b'\\' => self.operations.push_str("\\\\"),
160                b'\n' => self.operations.push_str("\\n"),
161                b'\r' => self.operations.push_str("\\r"),
162                b'\t' => self.operations.push_str("\\t"),
163                // For bytes in the printable ASCII range, write as is
164                0x20..=0x7E => self.operations.push(byte as char),
165                // For other bytes, write as octal escape sequences
166                _ => write!(&mut self.operations, "\\{byte:03o}")
167                    .expect("Writing to String should never fail"),
168            }
169        }
170        self.operations.push_str(") Tj\n");
171
172        // End text object
173        self.operations.push_str("ET\n");
174
175        Ok(self)
176    }
177
178    pub fn write_line(&mut self, text: &str) -> Result<&mut Self> {
179        self.write(text)?;
180        self.text_matrix[5] -= self.font_size * 1.2; // Move down for next line
181        Ok(self)
182    }
183
184    pub fn set_character_spacing(&mut self, spacing: f64) -> &mut Self {
185        self.character_spacing = Some(spacing);
186        self
187    }
188
189    pub fn set_word_spacing(&mut self, spacing: f64) -> &mut Self {
190        self.word_spacing = Some(spacing);
191        self
192    }
193
194    pub fn set_horizontal_scaling(&mut self, scale: f64) -> &mut Self {
195        self.horizontal_scaling = Some(scale);
196        self
197    }
198
199    pub fn set_leading(&mut self, leading: f64) -> &mut Self {
200        self.leading = Some(leading);
201        self
202    }
203
204    pub fn set_text_rise(&mut self, rise: f64) -> &mut Self {
205        self.text_rise = Some(rise);
206        self
207    }
208
209    /// Set the text rendering mode
210    pub fn set_rendering_mode(&mut self, mode: TextRenderingMode) -> &mut Self {
211        self.rendering_mode = Some(mode);
212        self
213    }
214
215    /// Set the text fill color
216    pub fn set_fill_color(&mut self, color: Color) -> &mut Self {
217        self.fill_color = Some(color);
218        self
219    }
220
221    /// Set the text stroke color
222    pub fn set_stroke_color(&mut self, color: Color) -> &mut Self {
223        self.stroke_color = Some(color);
224        self
225    }
226
227    /// Apply text state parameters to the operations string
228    fn apply_text_state_parameters(&mut self) {
229        // Character spacing (Tc)
230        if let Some(spacing) = self.character_spacing {
231            writeln!(&mut self.operations, "{spacing:.2} Tc")
232                .expect("Writing to String should never fail");
233        }
234
235        // Word spacing (Tw)
236        if let Some(spacing) = self.word_spacing {
237            writeln!(&mut self.operations, "{spacing:.2} Tw")
238                .expect("Writing to String should never fail");
239        }
240
241        // Horizontal scaling (Tz)
242        if let Some(scale) = self.horizontal_scaling {
243            writeln!(&mut self.operations, "{:.2} Tz", scale * 100.0)
244                .expect("Writing to String should never fail");
245        }
246
247        // Leading (TL)
248        if let Some(leading) = self.leading {
249            writeln!(&mut self.operations, "{leading:.2} TL")
250                .expect("Writing to String should never fail");
251        }
252
253        // Text rise (Ts)
254        if let Some(rise) = self.text_rise {
255            writeln!(&mut self.operations, "{rise:.2} Ts")
256                .expect("Writing to String should never fail");
257        }
258
259        // Text rendering mode (Tr)
260        if let Some(mode) = self.rendering_mode {
261            writeln!(&mut self.operations, "{} Tr", mode as u8)
262                .expect("Writing to String should never fail");
263        }
264
265        // Fill color
266        if let Some(color) = self.fill_color {
267            match color {
268                Color::Rgb(r, g, b) => {
269                    writeln!(&mut self.operations, "{r:.3} {g:.3} {b:.3} rg")
270                        .expect("Writing to String should never fail");
271                }
272                Color::Gray(gray) => {
273                    writeln!(&mut self.operations, "{gray:.3} g")
274                        .expect("Writing to String should never fail");
275                }
276                Color::Cmyk(c, m, y, k) => {
277                    writeln!(&mut self.operations, "{c:.3} {m:.3} {y:.3} {k:.3} k")
278                        .expect("Writing to String should never fail");
279                }
280            }
281        }
282
283        // Stroke color
284        if let Some(color) = self.stroke_color {
285            match color {
286                Color::Rgb(r, g, b) => {
287                    writeln!(&mut self.operations, "{r:.3} {g:.3} {b:.3} RG")
288                        .expect("Writing to String should never fail");
289                }
290                Color::Gray(gray) => {
291                    writeln!(&mut self.operations, "{gray:.3} G")
292                        .expect("Writing to String should never fail");
293                }
294                Color::Cmyk(c, m, y, k) => {
295                    writeln!(&mut self.operations, "{c:.3} {m:.3} {y:.3} {k:.3} K")
296                        .expect("Writing to String should never fail");
297                }
298            }
299        }
300    }
301
302    pub(crate) fn generate_operations(&self) -> Result<Vec<u8>> {
303        Ok(self.operations.as_bytes().to_vec())
304    }
305
306    /// Get the current font size
307    pub fn font_size(&self) -> f64 {
308        self.font_size
309    }
310
311    /// Get the current text matrix
312    pub fn text_matrix(&self) -> [f64; 6] {
313        self.text_matrix
314    }
315
316    /// Get the current position
317    pub fn position(&self) -> (f64, f64) {
318        (self.text_matrix[4], self.text_matrix[5])
319    }
320
321    /// Clear all operations and reset text state parameters
322    pub fn clear(&mut self) {
323        self.operations.clear();
324        self.character_spacing = None;
325        self.word_spacing = None;
326        self.horizontal_scaling = None;
327        self.leading = None;
328        self.text_rise = None;
329        self.rendering_mode = None;
330        self.fill_color = None;
331        self.stroke_color = None;
332    }
333
334    /// Get the raw operations string
335    pub fn operations(&self) -> &str {
336        &self.operations
337    }
338
339    /// Generate text state operations for testing purposes
340    #[cfg(test)]
341    pub fn generate_text_state_operations(&self) -> String {
342        let mut ops = String::new();
343
344        // Character spacing (Tc)
345        if let Some(spacing) = self.character_spacing {
346            writeln!(&mut ops, "{spacing:.2} Tc").unwrap();
347        }
348
349        // Word spacing (Tw)
350        if let Some(spacing) = self.word_spacing {
351            writeln!(&mut ops, "{spacing:.2} Tw").unwrap();
352        }
353
354        // Horizontal scaling (Tz)
355        if let Some(scale) = self.horizontal_scaling {
356            writeln!(&mut ops, "{:.2} Tz", scale * 100.0).unwrap();
357        }
358
359        // Leading (TL)
360        if let Some(leading) = self.leading {
361            writeln!(&mut ops, "{leading:.2} TL").unwrap();
362        }
363
364        // Text rise (Ts)
365        if let Some(rise) = self.text_rise {
366            writeln!(&mut ops, "{rise:.2} Ts").unwrap();
367        }
368
369        // Text rendering mode (Tr)
370        if let Some(mode) = self.rendering_mode {
371            writeln!(&mut ops, "{} Tr", mode as u8).unwrap();
372        }
373
374        ops
375    }
376}
377
378#[cfg(test)]
379mod tests {
380    use super::*;
381
382    #[test]
383    fn test_text_context_new() {
384        let context = TextContext::new();
385        assert_eq!(context.current_font, Font::Helvetica);
386        assert_eq!(context.font_size, 12.0);
387        assert_eq!(context.text_matrix, [1.0, 0.0, 0.0, 1.0, 0.0, 0.0]);
388        assert!(context.operations.is_empty());
389    }
390
391    #[test]
392    fn test_text_context_default() {
393        let context = TextContext::default();
394        assert_eq!(context.current_font, Font::Helvetica);
395        assert_eq!(context.font_size, 12.0);
396    }
397
398    #[test]
399    fn test_set_font() {
400        let mut context = TextContext::new();
401        context.set_font(Font::TimesBold, 14.0);
402        assert_eq!(context.current_font, Font::TimesBold);
403        assert_eq!(context.font_size, 14.0);
404    }
405
406    #[test]
407    fn test_position() {
408        let mut context = TextContext::new();
409        context.at(100.0, 200.0);
410        let (x, y) = context.position();
411        assert_eq!(x, 100.0);
412        assert_eq!(y, 200.0);
413        assert_eq!(context.text_matrix[4], 100.0);
414        assert_eq!(context.text_matrix[5], 200.0);
415    }
416
417    #[test]
418    fn test_write_simple_text() {
419        let mut context = TextContext::new();
420        context.write("Hello").unwrap();
421
422        let ops = context.operations();
423        assert!(ops.contains("BT\n"));
424        assert!(ops.contains("ET\n"));
425        assert!(ops.contains("/Helvetica 12 Tf"));
426        assert!(ops.contains("(Hello) Tj"));
427    }
428
429    #[test]
430    fn test_write_text_with_escaping() {
431        let mut context = TextContext::new();
432        context.write("(Hello)").unwrap();
433
434        let ops = context.operations();
435        assert!(ops.contains("(\\(Hello\\)) Tj"));
436    }
437
438    #[test]
439    fn test_write_line() {
440        let mut context = TextContext::new();
441        let initial_y = context.text_matrix[5];
442        context.write_line("Line 1").unwrap();
443
444        // Y position should have moved down
445        let new_y = context.text_matrix[5];
446        assert!(new_y < initial_y);
447        assert_eq!(new_y, initial_y - 12.0 * 1.2); // font_size * 1.2
448    }
449
450    #[test]
451    fn test_character_spacing() {
452        let mut context = TextContext::new();
453        context.set_character_spacing(2.5);
454
455        let ops = context.generate_text_state_operations();
456        assert!(ops.contains("2.50 Tc"));
457    }
458
459    #[test]
460    fn test_word_spacing() {
461        let mut context = TextContext::new();
462        context.set_word_spacing(1.5);
463
464        let ops = context.generate_text_state_operations();
465        assert!(ops.contains("1.50 Tw"));
466    }
467
468    #[test]
469    fn test_horizontal_scaling() {
470        let mut context = TextContext::new();
471        context.set_horizontal_scaling(1.25);
472
473        let ops = context.generate_text_state_operations();
474        assert!(ops.contains("125.00 Tz")); // 1.25 * 100
475    }
476
477    #[test]
478    fn test_leading() {
479        let mut context = TextContext::new();
480        context.set_leading(15.0);
481
482        let ops = context.generate_text_state_operations();
483        assert!(ops.contains("15.00 TL"));
484    }
485
486    #[test]
487    fn test_text_rise() {
488        let mut context = TextContext::new();
489        context.set_text_rise(3.0);
490
491        let ops = context.generate_text_state_operations();
492        assert!(ops.contains("3.00 Ts"));
493    }
494
495    #[test]
496    fn test_clear() {
497        let mut context = TextContext::new();
498        context.write("Hello").unwrap();
499        assert!(!context.operations().is_empty());
500
501        context.clear();
502        assert!(context.operations().is_empty());
503    }
504
505    #[test]
506    fn test_generate_operations() {
507        let mut context = TextContext::new();
508        context.write("Test").unwrap();
509
510        let ops_bytes = context.generate_operations().unwrap();
511        let ops_string = String::from_utf8(ops_bytes).unwrap();
512        assert_eq!(ops_string, context.operations());
513    }
514
515    #[test]
516    fn test_method_chaining() {
517        let mut context = TextContext::new();
518        context
519            .set_font(Font::Courier, 10.0)
520            .at(50.0, 100.0)
521            .set_character_spacing(1.0)
522            .set_word_spacing(2.0);
523
524        assert_eq!(context.current_font(), &Font::Courier);
525        assert_eq!(context.font_size(), 10.0);
526        let (x, y) = context.position();
527        assert_eq!(x, 50.0);
528        assert_eq!(y, 100.0);
529    }
530
531    #[test]
532    fn test_text_matrix_access() {
533        let mut context = TextContext::new();
534        context.at(25.0, 75.0);
535
536        let matrix = context.text_matrix();
537        assert_eq!(matrix, [1.0, 0.0, 0.0, 1.0, 25.0, 75.0]);
538    }
539
540    #[test]
541    fn test_special_characters_encoding() {
542        let mut context = TextContext::new();
543        context.write("Test\nLine\tTab").unwrap();
544
545        let ops = context.operations();
546        assert!(ops.contains("\\n"));
547        assert!(ops.contains("\\t"));
548    }
549
550    #[test]
551    fn test_rendering_mode_fill() {
552        let mut context = TextContext::new();
553        context.set_rendering_mode(TextRenderingMode::Fill);
554
555        let ops = context.generate_text_state_operations();
556        assert!(ops.contains("0 Tr"));
557    }
558
559    #[test]
560    fn test_rendering_mode_stroke() {
561        let mut context = TextContext::new();
562        context.set_rendering_mode(TextRenderingMode::Stroke);
563
564        let ops = context.generate_text_state_operations();
565        assert!(ops.contains("1 Tr"));
566    }
567
568    #[test]
569    fn test_rendering_mode_fill_stroke() {
570        let mut context = TextContext::new();
571        context.set_rendering_mode(TextRenderingMode::FillStroke);
572
573        let ops = context.generate_text_state_operations();
574        assert!(ops.contains("2 Tr"));
575    }
576
577    #[test]
578    fn test_rendering_mode_invisible() {
579        let mut context = TextContext::new();
580        context.set_rendering_mode(TextRenderingMode::Invisible);
581
582        let ops = context.generate_text_state_operations();
583        assert!(ops.contains("3 Tr"));
584    }
585
586    #[test]
587    fn test_rendering_mode_fill_clip() {
588        let mut context = TextContext::new();
589        context.set_rendering_mode(TextRenderingMode::FillClip);
590
591        let ops = context.generate_text_state_operations();
592        assert!(ops.contains("4 Tr"));
593    }
594
595    #[test]
596    fn test_rendering_mode_stroke_clip() {
597        let mut context = TextContext::new();
598        context.set_rendering_mode(TextRenderingMode::StrokeClip);
599
600        let ops = context.generate_text_state_operations();
601        assert!(ops.contains("5 Tr"));
602    }
603
604    #[test]
605    fn test_rendering_mode_fill_stroke_clip() {
606        let mut context = TextContext::new();
607        context.set_rendering_mode(TextRenderingMode::FillStrokeClip);
608
609        let ops = context.generate_text_state_operations();
610        assert!(ops.contains("6 Tr"));
611    }
612
613    #[test]
614    fn test_rendering_mode_clip() {
615        let mut context = TextContext::new();
616        context.set_rendering_mode(TextRenderingMode::Clip);
617
618        let ops = context.generate_text_state_operations();
619        assert!(ops.contains("7 Tr"));
620    }
621
622    #[test]
623    fn test_text_state_parameters_chaining() {
624        let mut context = TextContext::new();
625        context
626            .set_character_spacing(1.5)
627            .set_word_spacing(2.0)
628            .set_horizontal_scaling(1.1)
629            .set_leading(14.0)
630            .set_text_rise(0.5)
631            .set_rendering_mode(TextRenderingMode::FillStroke);
632
633        let ops = context.generate_text_state_operations();
634        assert!(ops.contains("1.50 Tc"));
635        assert!(ops.contains("2.00 Tw"));
636        assert!(ops.contains("110.00 Tz"));
637        assert!(ops.contains("14.00 TL"));
638        assert!(ops.contains("0.50 Ts"));
639        assert!(ops.contains("2 Tr"));
640    }
641
642    #[test]
643    fn test_all_text_state_operators_generated() {
644        let mut context = TextContext::new();
645
646        // Test all operators in sequence
647        context.set_character_spacing(1.0); // Tc
648        context.set_word_spacing(2.0); // Tw
649        context.set_horizontal_scaling(1.2); // Tz
650        context.set_leading(15.0); // TL
651        context.set_text_rise(1.0); // Ts
652        context.set_rendering_mode(TextRenderingMode::Stroke); // Tr
653
654        let ops = context.generate_text_state_operations();
655
656        // Verify all PDF text state operators are present
657        assert!(
658            ops.contains("Tc"),
659            "Character spacing operator (Tc) not found"
660        );
661        assert!(ops.contains("Tw"), "Word spacing operator (Tw) not found");
662        assert!(
663            ops.contains("Tz"),
664            "Horizontal scaling operator (Tz) not found"
665        );
666        assert!(ops.contains("TL"), "Leading operator (TL) not found");
667        assert!(ops.contains("Ts"), "Text rise operator (Ts) not found");
668        assert!(
669            ops.contains("Tr"),
670            "Text rendering mode operator (Tr) not found"
671        );
672    }
673
674    #[test]
675    fn test_text_color_operations() {
676        use crate::Color;
677
678        let mut context = TextContext::new();
679
680        // Test RGB fill color
681        context.set_fill_color(Color::rgb(1.0, 0.0, 0.0));
682        context.apply_text_state_parameters();
683
684        let ops = context.operations();
685        assert!(
686            ops.contains("1.000 0.000 0.000 rg"),
687            "RGB fill color operator (rg) not found in: {ops}"
688        );
689
690        // Clear and test RGB stroke color
691        context.clear();
692        context.set_stroke_color(Color::rgb(0.0, 1.0, 0.0));
693        context.apply_text_state_parameters();
694
695        let ops = context.operations();
696        assert!(
697            ops.contains("0.000 1.000 0.000 RG"),
698            "RGB stroke color operator (RG) not found in: {ops}"
699        );
700
701        // Clear and test grayscale fill color
702        context.clear();
703        context.set_fill_color(Color::gray(0.5));
704        context.apply_text_state_parameters();
705
706        let ops = context.operations();
707        assert!(
708            ops.contains("0.500 g"),
709            "Gray fill color operator (g) not found in: {ops}"
710        );
711
712        // Clear and test CMYK stroke color
713        context.clear();
714        context.set_stroke_color(Color::cmyk(0.2, 0.3, 0.4, 0.1));
715        context.apply_text_state_parameters();
716
717        let ops = context.operations();
718        assert!(
719            ops.contains("0.200 0.300 0.400 0.100 K"),
720            "CMYK stroke color operator (K) not found in: {ops}"
721        );
722
723        // Test both fill and stroke colors together
724        context.clear();
725        context.set_fill_color(Color::rgb(1.0, 0.0, 0.0));
726        context.set_stroke_color(Color::rgb(0.0, 0.0, 1.0));
727        context.apply_text_state_parameters();
728
729        let ops = context.operations();
730        assert!(
731            ops.contains("1.000 0.000 0.000 rg") && ops.contains("0.000 0.000 1.000 RG"),
732            "Both fill and stroke colors not found in: {ops}"
733        );
734    }
735}