oxidize_pdf/text/
mod.rs

1mod font;
2mod encoding;
3mod metrics;
4mod flow;
5mod extraction;
6
7pub use font::{Font, FontFamily};
8pub use encoding::TextEncoding;
9pub use metrics::{measure_text, measure_char, split_into_words};
10pub use flow::{TextFlowContext, TextAlign};
11pub use extraction::{TextExtractor, ExtractionOptions, ExtractedText, TextFragment};
12
13use crate::error::Result;
14use std::fmt::Write;
15
16#[derive(Clone)]
17pub struct TextContext {
18    operations: String,
19    current_font: Font,
20    font_size: f64,
21    text_matrix: [f64; 6],
22}
23
24impl TextContext {
25    pub fn new() -> Self {
26        Self {
27            operations: String::new(),
28            current_font: Font::Helvetica,
29            font_size: 12.0,
30            text_matrix: [1.0, 0.0, 0.0, 1.0, 0.0, 0.0],
31        }
32    }
33    
34    pub fn set_font(&mut self, font: Font, size: f64) -> &mut Self {
35        self.current_font = font;
36        self.font_size = size;
37        self
38    }
39    
40    pub fn at(&mut self, x: f64, y: f64) -> &mut Self {
41        self.text_matrix[4] = x;
42        self.text_matrix[5] = y;
43        self
44    }
45    
46    pub fn write(&mut self, text: &str) -> Result<&mut Self> {
47        // Begin text object
48        self.operations.push_str("BT\n");
49        
50        // Set font
51        write!(&mut self.operations, "/{} {} Tf\n", 
52               self.current_font.pdf_name(), self.font_size).unwrap();
53        
54        // Set text position
55        write!(&mut self.operations, "{:.2} {:.2} Td\n", 
56               self.text_matrix[4], self.text_matrix[5]).unwrap();
57        
58        // Encode text using WinAnsiEncoding
59        let encoding = TextEncoding::WinAnsiEncoding;
60        let encoded_bytes = encoding.encode(text);
61        
62        // Show text as a literal string
63        self.operations.push('(');
64        for &byte in &encoded_bytes {
65            match byte {
66                b'(' => self.operations.push_str("\\("),
67                b')' => self.operations.push_str("\\)"),
68                b'\\' => self.operations.push_str("\\\\"),
69                b'\n' => self.operations.push_str("\\n"),
70                b'\r' => self.operations.push_str("\\r"),
71                b'\t' => self.operations.push_str("\\t"),
72                // For bytes in the printable ASCII range, write as is
73                0x20..=0x7E => self.operations.push(byte as char),
74                // For other bytes, write as octal escape sequences
75                _ => write!(&mut self.operations, "\\{:03o}", byte).unwrap(),
76            }
77        }
78        self.operations.push_str(") Tj\n");
79        
80        // End text object
81        self.operations.push_str("ET\n");
82        
83        Ok(self)
84    }
85    
86    pub fn write_line(&mut self, text: &str) -> Result<&mut Self> {
87        self.write(text)?;
88        self.text_matrix[5] -= self.font_size * 1.2; // Move down for next line
89        Ok(self)
90    }
91    
92    pub fn set_character_spacing(&mut self, spacing: f64) -> &mut Self {
93        write!(&mut self.operations, "{:.2} Tc\n", spacing).unwrap();
94        self
95    }
96    
97    pub fn set_word_spacing(&mut self, spacing: f64) -> &mut Self {
98        write!(&mut self.operations, "{:.2} Tw\n", spacing).unwrap();
99        self
100    }
101    
102    pub fn set_horizontal_scaling(&mut self, scale: f64) -> &mut Self {
103        write!(&mut self.operations, "{:.2} Tz\n", scale * 100.0).unwrap();
104        self
105    }
106    
107    pub fn set_leading(&mut self, leading: f64) -> &mut Self {
108        write!(&mut self.operations, "{:.2} TL\n", leading).unwrap();
109        self
110    }
111    
112    pub fn set_text_rise(&mut self, rise: f64) -> &mut Self {
113        write!(&mut self.operations, "{:.2} Ts\n", rise).unwrap();
114        self
115    }
116    
117    pub(crate) fn generate_operations(&self) -> Result<Vec<u8>> {
118        Ok(self.operations.as_bytes().to_vec())
119    }
120}