oxidize_pdf/text/
mod.rs

1mod encoding;
2mod extraction;
3mod flow;
4mod font;
5mod metrics;
6
7pub use encoding::TextEncoding;
8pub use extraction::{ExtractedText, ExtractionOptions, TextExtractor, TextFragment};
9pub use flow::{TextAlign, TextFlowContext};
10pub use font::{Font, FontFamily};
11pub use metrics::{measure_char, measure_text, split_into_words};
12
13use crate::error::Result;
14use std::fmt::Write;
15
16#[derive(Clone)]
17pub struct TextContext {
18    operations: String,
19    current_font: Font,
20    font_size: f64,
21    text_matrix: [f64; 6],
22}
23
24impl Default for TextContext {
25    fn default() -> Self {
26        Self::new()
27    }
28}
29
30impl TextContext {
31    pub fn new() -> Self {
32        Self {
33            operations: String::new(),
34            current_font: Font::Helvetica,
35            font_size: 12.0,
36            text_matrix: [1.0, 0.0, 0.0, 1.0, 0.0, 0.0],
37        }
38    }
39
40    pub fn set_font(&mut self, font: Font, size: f64) -> &mut Self {
41        self.current_font = font;
42        self.font_size = size;
43        self
44    }
45
46    pub fn at(&mut self, x: f64, y: f64) -> &mut Self {
47        self.text_matrix[4] = x;
48        self.text_matrix[5] = y;
49        self
50    }
51
52    pub fn write(&mut self, text: &str) -> Result<&mut Self> {
53        // Begin text object
54        self.operations.push_str("BT\n");
55
56        // Set font
57        writeln!(
58            &mut self.operations,
59            "/{} {} Tf",
60            self.current_font.pdf_name(),
61            self.font_size
62        )
63        .unwrap();
64
65        // Set text position
66        writeln!(
67            &mut self.operations,
68            "{:.2} {:.2} Td",
69            self.text_matrix[4], self.text_matrix[5]
70        )
71        .unwrap();
72
73        // Encode text using WinAnsiEncoding
74        let encoding = TextEncoding::WinAnsiEncoding;
75        let encoded_bytes = encoding.encode(text);
76
77        // Show text as a literal string
78        self.operations.push('(');
79        for &byte in &encoded_bytes {
80            match byte {
81                b'(' => self.operations.push_str("\\("),
82                b')' => self.operations.push_str("\\)"),
83                b'\\' => self.operations.push_str("\\\\"),
84                b'\n' => self.operations.push_str("\\n"),
85                b'\r' => self.operations.push_str("\\r"),
86                b'\t' => self.operations.push_str("\\t"),
87                // For bytes in the printable ASCII range, write as is
88                0x20..=0x7E => self.operations.push(byte as char),
89                // For other bytes, write as octal escape sequences
90                _ => write!(&mut self.operations, "\\{byte:03o}").unwrap(),
91            }
92        }
93        self.operations.push_str(") Tj\n");
94
95        // End text object
96        self.operations.push_str("ET\n");
97
98        Ok(self)
99    }
100
101    pub fn write_line(&mut self, text: &str) -> Result<&mut Self> {
102        self.write(text)?;
103        self.text_matrix[5] -= self.font_size * 1.2; // Move down for next line
104        Ok(self)
105    }
106
107    pub fn set_character_spacing(&mut self, spacing: f64) -> &mut Self {
108        writeln!(&mut self.operations, "{spacing:.2} Tc").unwrap();
109        self
110    }
111
112    pub fn set_word_spacing(&mut self, spacing: f64) -> &mut Self {
113        writeln!(&mut self.operations, "{spacing:.2} Tw").unwrap();
114        self
115    }
116
117    pub fn set_horizontal_scaling(&mut self, scale: f64) -> &mut Self {
118        writeln!(&mut self.operations, "{:.2} Tz", scale * 100.0).unwrap();
119        self
120    }
121
122    pub fn set_leading(&mut self, leading: f64) -> &mut Self {
123        writeln!(&mut self.operations, "{leading:.2} TL").unwrap();
124        self
125    }
126
127    pub fn set_text_rise(&mut self, rise: f64) -> &mut Self {
128        writeln!(&mut self.operations, "{rise:.2} Ts").unwrap();
129        self
130    }
131
132    pub(crate) fn generate_operations(&self) -> Result<Vec<u8>> {
133        Ok(self.operations.as_bytes().to_vec())
134    }
135}