Skip to main content

oxidize_pdf/text/
mod.rs

1pub mod cid_to_unicode;
2pub mod cmap;
3mod encoding;
4pub(crate) mod encoding_cmap;
5pub mod extraction;
6mod extraction_cmap;
7mod flow;
8mod font;
9pub mod font_manager;
10pub mod fonts;
11mod header_footer;
12pub mod invoice;
13mod layout;
14mod list;
15pub mod metrics;
16pub mod ocr;
17pub mod plaintext;
18pub mod structured;
19pub mod table;
20pub mod table_detection;
21pub mod text_block;
22pub mod validation;
23
24#[cfg(test)]
25mod cmap_tests;
26
27#[cfg(feature = "ocr-tesseract")]
28pub mod tesseract_provider;
29
30pub use encoding::{escape_pdf_string_literal, TextEncoding};
31pub use extraction::{
32    sanitize_extracted_text, ExtractedText, ExtractionOptions, TextExtractor, TextFragment,
33};
34pub use flow::{TextAlign, TextFlowContext};
35pub use font::{Font, FontEncoding, FontFamily, FontWithEncoding};
36pub use font_manager::{CustomFont, FontDescriptor, FontFlags, FontManager, FontMetrics, FontType};
37pub use header_footer::{HeaderFooter, HeaderFooterOptions, HeaderFooterPosition};
38pub use layout::{ColumnContent, ColumnLayout, ColumnOptions, TextFormat};
39pub use list::{
40    BulletStyle, ListElement, ListItem, ListOptions, ListStyle as ListStyleEnum, OrderedList,
41    OrderedListStyle, UnorderedList,
42};
43pub use metrics::{
44    measure_char, measure_char_with, measure_text, measure_text_with, split_into_words,
45    FontMetricsStore,
46};
47pub use ocr::{
48    CharacterConfidence, CorrectionCandidate, CorrectionReason, CorrectionSuggestion,
49    CorrectionType, FragmentType, ImagePreprocessing, MockOcrProvider, OcrEngine, OcrError,
50    OcrOptions, OcrPostProcessor, OcrProcessingResult, OcrProvider, OcrRegion, OcrResult,
51    OcrTextFragment, WordConfidence,
52};
53pub use plaintext::{LineBreakMode, PlainTextConfig, PlainTextExtractor, PlainTextResult};
54pub use table::{HeaderStyle, Table, TableCell, TableOptions};
55pub use text_block::{
56    compute_line_widths, measure_text_block, measure_text_block_with, TextBlockMetrics,
57};
58pub use validation::{MatchType, TextMatch, TextValidationResult, TextValidator};
59
60#[cfg(feature = "ocr-tesseract")]
61pub use tesseract_provider::{RustyTesseractConfig, RustyTesseractProvider};
62
63use crate::error::Result;
64use crate::Color;
65use std::collections::{HashMap, HashSet};
66
67/// Text rendering mode for PDF text operations.
68///
69/// Re-exported via `oxidize_pdf::text::TextRenderingMode`.
70#[derive(Clone, Copy, Debug, PartialEq, Eq)]
71pub enum TextRenderingMode {
72    /// Fill text (default)
73    Fill = 0,
74    /// Stroke text
75    Stroke = 1,
76    /// Fill and stroke text
77    FillStroke = 2,
78    /// Invisible text (for searchable text over images)
79    Invisible = 3,
80    /// Fill text and add to path for clipping
81    FillClip = 4,
82    /// Stroke text and add to path for clipping
83    StrokeClip = 5,
84    /// Fill and stroke text and add to path for clipping
85    FillStrokeClip = 6,
86    /// Add text to path for clipping (invisible)
87    Clip = 7,
88}
89
90/// Build the show-text IR op for `text` rendered with `font`. Single
91/// emission path shared by `TextContext::write` and
92/// `TextFlowContext::write_wrapped` so the two cannot diverge on encoding
93/// or escaping (issue #240 — pre-fix, the flow path emitted raw UTF-8
94/// bytes inside the literal `( … ) Tj` and any character outside ASCII
95/// rendered as Windows-1252 mojibake).
96///
97/// - `Font::Custom(_)` → UTF-16BE hex string per ISO 32000-1 §9.10.3,
98///   wrapped in `Op::ShowTextHex` so the writer emits `< … > Tj`.
99/// - Any builtin font → bytes are first WinAnsi-encoded
100///   ([`TextEncoding::WinAnsiEncoding`]) and then escaped for inclusion
101///   in a PDF string literal via
102///   [`encoding::escape_show_text_literal_bytes`].
103pub(crate) fn build_show_text_op(text: &str, font: &Font) -> crate::graphics::ops::Op {
104    use crate::graphics::ops::Op;
105
106    match font {
107        Font::Custom(_) => {
108            let utf16_units: Vec<u16> = text.encode_utf16().collect();
109            let mut hex = String::with_capacity(utf16_units.len() * 4);
110            for unit in utf16_units {
111                use std::fmt::Write as _;
112                write!(
113                    &mut hex,
114                    "{:02X}{:02X}",
115                    (unit >> 8) as u8,
116                    (unit & 0xFF) as u8
117                )
118                .expect("write to String never fails");
119            }
120            Op::ShowTextHex(hex.into_bytes())
121        }
122        _ => {
123            let encoded = TextEncoding::WinAnsiEncoding.encode(text);
124            Op::ShowText(encoding::escape_show_text_literal_bytes(&encoded))
125        }
126    }
127}
128
129#[derive(Clone)]
130pub struct TextContext {
131    operations: Vec<crate::graphics::ops::Op>,
132    current_font: Font,
133    font_size: f64,
134    text_matrix: [f64; 6],
135    // Pending position for next write operation
136    pending_position: Option<(f64, f64)>,
137    // Text state parameters
138    character_spacing: Option<f64>,
139    word_spacing: Option<f64>,
140    horizontal_scaling: Option<f64>,
141    leading: Option<f64>,
142    text_rise: Option<f64>,
143    rendering_mode: Option<TextRenderingMode>,
144    // Color parameters
145    fill_color: Option<Color>,
146    stroke_color: Option<Color>,
147    // Track used characters per custom-font name (issue #204 — a single
148    // global set caused every registered font to be subsetted with the
149    // same characters, so two fonts of the same family ended up with
150    // duplicated subsets). Builtin fonts are not tracked because they
151    // don't need subsetting. Extended by `write` whenever the active
152    // font is `Font::Custom`.
153    used_characters_by_font: HashMap<String, HashSet<char>>,
154    /// Per-document font metrics store threaded from `Page` (issue #230).
155    /// `None` means the built-in heuristic width tables are used.
156    /// Non-test callers arrive in Task 9-11 (Document integration).
157    #[allow(dead_code)]
158    pub(crate) font_metrics_store: Option<FontMetricsStore>,
159}
160
161impl Default for TextContext {
162    fn default() -> Self {
163        Self::new()
164    }
165}
166
167impl TextContext {
168    pub fn new() -> Self {
169        Self {
170            operations: Vec::new(),
171            current_font: Font::Helvetica,
172            font_size: 12.0,
173            text_matrix: [1.0, 0.0, 0.0, 1.0, 0.0, 0.0],
174            pending_position: None,
175            character_spacing: None,
176            word_spacing: None,
177            horizontal_scaling: None,
178            leading: None,
179            text_rise: None,
180            rendering_mode: None,
181            fill_color: None,
182            stroke_color: None,
183            used_characters_by_font: HashMap::new(),
184            font_metrics_store: None,
185        }
186    }
187
188    /// Create a `TextContext` bound to a per-document `FontMetricsStore`
189    /// (issue #230). `None` is equivalent to `TextContext::new()`.
190    ///
191    /// `pub(crate)` — wired by `Page::*_with_metrics()` constructors and
192    /// by `Document::new_page_*()` factories.
193    pub(crate) fn with_metrics_store(store: Option<FontMetricsStore>) -> Self {
194        let mut ctx = Self::default();
195        ctx.font_metrics_store = store;
196        ctx
197    }
198
199    /// Inject or replace the per-Document `FontMetricsStore` on an
200    /// already-constructed context. Preserves accumulated ops and any
201    /// other state — only the `font_metrics_store` field is mutated.
202    ///
203    /// Called by `Document::add_page` for pages constructed via
204    /// `Page::a4()` / `Page::letter()` / `Page::new()` (those start with
205    /// `font_metrics_store: None` and may already carry ops the caller
206    /// pushed before transferring ownership to the Document).
207    pub(crate) fn set_metrics_store(&mut self, store: Option<FontMetricsStore>) {
208        self.font_metrics_store = store;
209    }
210
211    /// Record `text` as drawn with the currently-active font, bucketed
212    /// under the font's PDF name (issue #204). Builtin and custom fonts
213    /// are both tracked; the writer later filters to the set of
214    /// registered custom fonts when subsetting.
215    fn record_used_chars(&mut self, text: &str) {
216        let name = match &self.current_font {
217            Font::Custom(name) => name.clone(),
218            builtin => builtin.pdf_name(),
219        };
220        self.used_characters_by_font
221            .entry(name)
222            .or_default()
223            .extend(text.chars());
224    }
225
226    /// Introspection helper for Task 7 tests (issue #230).
227    #[cfg(test)]
228    pub(crate) fn font_metrics_store_for_test(&self) -> Option<&FontMetricsStore> {
229        self.font_metrics_store.as_ref()
230    }
231
232    /// Get the characters used in this text context (merged across all
233    /// fonts). Test-only compatibility accessor; callers that need
234    /// per-font accuracy for subsetting should use
235    /// [`TextContext::get_used_characters_by_font`] (issue #204).
236    #[cfg(test)]
237    pub(crate) fn get_used_characters(&self) -> Option<HashSet<char>> {
238        let merged: HashSet<char> = self
239            .used_characters_by_font
240            .values()
241            .flat_map(|s| s.iter().copied())
242            .collect();
243        if merged.is_empty() {
244            None
245        } else {
246            Some(merged)
247        }
248    }
249
250    /// Get the per-font character map for font subsetting (issue #204).
251    pub(crate) fn get_used_characters_by_font(&self) -> &HashMap<String, HashSet<char>> {
252        &self.used_characters_by_font
253    }
254
255    pub fn set_font(&mut self, font: Font, size: f64) -> &mut Self {
256        self.current_font = font;
257        self.font_size = size;
258        self
259    }
260
261    /// Get the current font
262    #[allow(dead_code)]
263    pub(crate) fn current_font(&self) -> &Font {
264        &self.current_font
265    }
266
267    /// Current non-stroking (fill) colour, if one has been explicitly set.
268    /// Used by `Page::text_flow` to propagate the page-level text colour
269    /// into derived `TextFlowContext`s (issue #216).
270    pub(crate) fn fill_color(&self) -> Option<Color> {
271        self.fill_color
272    }
273
274    /// Accessors for the remaining text-state parameters (issue #222 —
275    /// Phase 6 of the v2.7.0 IR refactor). Used by `Page::text_flow` to
276    /// propagate the configured page-level state into derived
277    /// `TextFlowContext`s. Mirror of `fill_color()` above.
278    pub(crate) fn character_spacing(&self) -> Option<f64> {
279        self.character_spacing
280    }
281    pub(crate) fn word_spacing(&self) -> Option<f64> {
282        self.word_spacing
283    }
284    pub(crate) fn horizontal_scaling(&self) -> Option<f64> {
285        self.horizontal_scaling
286    }
287    pub(crate) fn leading(&self) -> Option<f64> {
288        self.leading
289    }
290    pub(crate) fn text_rise(&self) -> Option<f64> {
291        self.text_rise
292    }
293    pub(crate) fn rendering_mode(&self) -> Option<TextRenderingMode> {
294        self.rendering_mode
295    }
296    pub(crate) fn stroke_color(&self) -> Option<Color> {
297        self.stroke_color
298    }
299
300    pub fn at(&mut self, x: f64, y: f64) -> &mut Self {
301        // Update text_matrix immediately and store for write() operation
302        self.text_matrix[4] = x;
303        self.text_matrix[5] = y;
304        self.pending_position = Some((x, y));
305        self
306    }
307
308    pub fn write(&mut self, text: &str) -> Result<&mut Self> {
309        use crate::graphics::ops::Op;
310
311        self.operations.push(Op::BeginText);
312
313        // Set font
314        self.operations.push(Op::SetFont {
315            name: self.current_font.pdf_name(),
316            size: self.font_size,
317        });
318
319        // Apply text state parameters (Tc/Tw/Tz/TL/Ts/Tr + colour)
320        self.apply_text_state_parameters();
321
322        // Set text position using pending_position if available, otherwise use text_matrix
323        let (x, y) = if let Some((px, py)) = self.pending_position.take() {
324            (px, py)
325        } else {
326            (self.text_matrix[4], self.text_matrix[5])
327        };
328        self.operations.push(Op::SetTextPosition { x, y });
329
330        // Shared encoding + escape pipeline (issue #240): builtin fonts
331        // route through WinAnsi + literal-string escape; Custom (CJK)
332        // fonts route through UTF-16BE hex. Mirror of the same call in
333        // `TextFlowContext::write_wrapped` — single source of truth.
334        self.operations
335            .push(build_show_text_op(text, &self.current_font));
336
337        // Track used characters for font subsetting bucketed by the
338        // active custom font (issue #204).
339        self.record_used_chars(text);
340
341        self.operations.push(Op::EndText);
342
343        Ok(self)
344    }
345
346    pub fn write_line(&mut self, text: &str) -> Result<&mut Self> {
347        self.write(text)?;
348        self.text_matrix[5] -= self.font_size * 1.2; // Move down for next line
349        Ok(self)
350    }
351
352    pub fn set_character_spacing(&mut self, spacing: f64) -> &mut Self {
353        self.character_spacing = Some(spacing);
354        self
355    }
356
357    pub fn set_word_spacing(&mut self, spacing: f64) -> &mut Self {
358        self.word_spacing = Some(spacing);
359        self
360    }
361
362    pub fn set_horizontal_scaling(&mut self, scale: f64) -> &mut Self {
363        self.horizontal_scaling = Some(scale);
364        self
365    }
366
367    pub fn set_leading(&mut self, leading: f64) -> &mut Self {
368        self.leading = Some(leading);
369        self
370    }
371
372    pub fn set_text_rise(&mut self, rise: f64) -> &mut Self {
373        self.text_rise = Some(rise);
374        self
375    }
376
377    /// Set the text rendering mode
378    pub fn set_rendering_mode(&mut self, mode: TextRenderingMode) -> &mut Self {
379        self.rendering_mode = Some(mode);
380        self
381    }
382
383    /// Set the text fill color
384    pub fn set_fill_color(&mut self, color: Color) -> &mut Self {
385        self.fill_color = Some(color);
386        self
387    }
388
389    /// Set the text stroke color
390    pub fn set_stroke_color(&mut self, color: Color) -> &mut Self {
391        self.stroke_color = Some(color);
392        self
393    }
394
395    /// Apply text state parameters as `Op` values pushed into `self.operations`.
396    ///
397    /// All non-finite floats are clamped to `0.0` at serialisation time by
398    /// `serialize_ops` (issues #220 + #221 extend to non-colour emitters in
399    /// the v2.7.0 IR refactor).
400    fn apply_text_state_parameters(&mut self) {
401        use crate::graphics::ops::Op;
402
403        if let Some(spacing) = self.character_spacing {
404            self.operations.push(Op::SetCharSpacing(spacing));
405        }
406        if let Some(spacing) = self.word_spacing {
407            self.operations.push(Op::SetWordSpacing(spacing));
408        }
409        if let Some(scale) = self.horizontal_scaling {
410            // Tz operator takes a percentage. The setter accepts a 0.0–1.0
411            // ratio and the original implementation multiplied by 100 at
412            // emission; preserve that contract.
413            self.operations
414                .push(Op::SetHorizontalScaling(scale * 100.0));
415        }
416        if let Some(leading) = self.leading {
417            self.operations.push(Op::SetLeading(leading));
418        }
419        if let Some(rise) = self.text_rise {
420            self.operations.push(Op::SetTextRise(rise));
421        }
422        if let Some(mode) = self.rendering_mode {
423            self.operations.push(Op::SetRenderingMode(mode as u8));
424        }
425
426        // Fill / stroke colour delegates to the IR variants which in turn
427        // delegate to `write_fill_color_bytes` / `write_stroke_color_bytes`
428        // (issues #220 + #221).
429        if let Some(color) = self.fill_color {
430            self.operations.push(Op::SetFillColor(color));
431        }
432        if let Some(color) = self.stroke_color {
433            self.operations.push(Op::SetStrokeColor(color));
434        }
435    }
436
437    pub(crate) fn generate_operations(&self) -> Result<Vec<u8>> {
438        let mut buf = Vec::new();
439        crate::graphics::ops::serialize_ops(&mut buf, &self.operations);
440        Ok(buf)
441    }
442
443    /// Take ownership of the accumulated `Op` buffer, leaving an empty
444    /// `Vec` in its place. Mirror of `GraphicsContext::drain_ops` —
445    /// used by `Page` to flush the text buffer into a unified content
446    /// stream on context switch (issue #227).
447    pub(crate) fn drain_ops(&mut self) -> Vec<crate::graphics::ops::Op> {
448        std::mem::take(&mut self.operations)
449    }
450
451    /// Read-only access to the operation list.
452    pub(crate) fn ops_slice(&self) -> &[crate::graphics::ops::Op] {
453        &self.operations
454    }
455
456    /// Appends a raw PDF operation to the text context
457    ///
458    /// This is used internally for marked content operators (BDC/EMC) and other
459    /// low-level PDF operations that need to be interleaved with text operations.
460    pub(crate) fn append_raw_operation(&mut self, operation: &str) {
461        self.operations
462            .push(crate::graphics::ops::Op::Raw(operation.as_bytes().to_vec()));
463    }
464
465    /// Get the current font size
466    pub fn font_size(&self) -> f64 {
467        self.font_size
468    }
469
470    /// Get the current text matrix
471    pub fn text_matrix(&self) -> [f64; 6] {
472        self.text_matrix
473    }
474
475    /// Get the current position
476    pub fn position(&self) -> (f64, f64) {
477        (self.text_matrix[4], self.text_matrix[5])
478    }
479
480    /// Clear all operations and reset text state parameters
481    pub fn clear(&mut self) {
482        self.operations.clear();
483        self.character_spacing = None;
484        self.word_spacing = None;
485        self.horizontal_scaling = None;
486        self.leading = None;
487        self.text_rise = None;
488        self.rendering_mode = None;
489        self.fill_color = None;
490        self.stroke_color = None;
491    }
492
493    /// Get the operations as a serialised PDF content-stream `String`.
494    ///
495    /// Pre-2.7.0 this returned `&str`. The IR migration replaced the
496    /// internal `String` buffer with a typed `Vec<Op>`, so the legacy
497    /// borrow is materialised on demand. Internal callers prefer
498    /// `generate_operations()` which returns the byte buffer directly.
499    pub fn operations(&self) -> String {
500        crate::graphics::ops::ops_to_string(&self.operations)
501    }
502
503    /// Generate text state operations for testing purposes.
504    /// Routes through the IR so the same sanitisation applies.
505    #[cfg(test)]
506    pub fn generate_text_state_operations(&self) -> String {
507        use crate::graphics::ops::{ops_to_string, Op};
508
509        let mut ops = Vec::new();
510        if let Some(spacing) = self.character_spacing {
511            ops.push(Op::SetCharSpacing(spacing));
512        }
513        if let Some(spacing) = self.word_spacing {
514            ops.push(Op::SetWordSpacing(spacing));
515        }
516        if let Some(scale) = self.horizontal_scaling {
517            ops.push(Op::SetHorizontalScaling(scale * 100.0));
518        }
519        if let Some(leading) = self.leading {
520            ops.push(Op::SetLeading(leading));
521        }
522        if let Some(rise) = self.text_rise {
523            ops.push(Op::SetTextRise(rise));
524        }
525        if let Some(mode) = self.rendering_mode {
526            ops.push(Op::SetRenderingMode(mode as u8));
527        }
528        ops_to_string(&ops)
529    }
530}
531
532#[cfg(test)]
533mod tests {
534    use super::*;
535
536    #[test]
537    fn test_text_context_new() {
538        let context = TextContext::new();
539        assert_eq!(context.current_font, Font::Helvetica);
540        assert_eq!(context.font_size, 12.0);
541        assert_eq!(context.text_matrix, [1.0, 0.0, 0.0, 1.0, 0.0, 0.0]);
542        assert!(context.operations.is_empty());
543    }
544
545    #[test]
546    fn test_text_context_default() {
547        let context = TextContext::default();
548        assert_eq!(context.current_font, Font::Helvetica);
549        assert_eq!(context.font_size, 12.0);
550    }
551
552    #[test]
553    fn test_set_font() {
554        let mut context = TextContext::new();
555        context.set_font(Font::TimesBold, 14.0);
556        assert_eq!(context.current_font, Font::TimesBold);
557        assert_eq!(context.font_size, 14.0);
558    }
559
560    #[test]
561    fn test_position() {
562        let mut context = TextContext::new();
563        context.at(100.0, 200.0);
564        let (x, y) = context.position();
565        assert_eq!(x, 100.0);
566        assert_eq!(y, 200.0);
567        assert_eq!(context.text_matrix[4], 100.0);
568        assert_eq!(context.text_matrix[5], 200.0);
569    }
570
571    #[test]
572    fn test_write_simple_text() {
573        let mut context = TextContext::new();
574        context.write("Hello").unwrap();
575
576        let ops = context.operations();
577        assert!(ops.contains("BT\n"));
578        assert!(ops.contains("ET\n"));
579        assert!(ops.contains("/Helvetica 12 Tf"));
580        assert!(ops.contains("(Hello) Tj"));
581    }
582
583    #[test]
584    fn test_write_text_with_escaping() {
585        let mut context = TextContext::new();
586        context.write("(Hello)").unwrap();
587
588        let ops = context.operations();
589        assert!(ops.contains("(\\(Hello\\)) Tj"));
590    }
591
592    #[test]
593    fn test_write_line() {
594        let mut context = TextContext::new();
595        let initial_y = context.text_matrix[5];
596        context.write_line("Line 1").unwrap();
597
598        // Y position should have moved down
599        let new_y = context.text_matrix[5];
600        assert!(new_y < initial_y);
601        assert_eq!(new_y, initial_y - 12.0 * 1.2); // font_size * 1.2
602    }
603
604    #[test]
605    fn test_character_spacing() {
606        let mut context = TextContext::new();
607        context.set_character_spacing(2.5);
608
609        let ops = context.generate_text_state_operations();
610        assert!(ops.contains("2.50 Tc"));
611    }
612
613    #[test]
614    fn test_word_spacing() {
615        let mut context = TextContext::new();
616        context.set_word_spacing(1.5);
617
618        let ops = context.generate_text_state_operations();
619        assert!(ops.contains("1.50 Tw"));
620    }
621
622    #[test]
623    fn test_horizontal_scaling() {
624        let mut context = TextContext::new();
625        context.set_horizontal_scaling(1.25);
626
627        let ops = context.generate_text_state_operations();
628        assert!(ops.contains("125.00 Tz")); // 1.25 * 100
629    }
630
631    #[test]
632    fn test_leading() {
633        let mut context = TextContext::new();
634        context.set_leading(15.0);
635
636        let ops = context.generate_text_state_operations();
637        assert!(ops.contains("15.00 TL"));
638    }
639
640    #[test]
641    fn test_text_rise() {
642        let mut context = TextContext::new();
643        context.set_text_rise(3.0);
644
645        let ops = context.generate_text_state_operations();
646        assert!(ops.contains("3.00 Ts"));
647    }
648
649    #[test]
650    fn test_clear() {
651        let mut context = TextContext::new();
652        context.write("Hello").unwrap();
653        assert!(!context.operations().is_empty());
654
655        context.clear();
656        assert!(context.operations().is_empty());
657    }
658
659    #[test]
660    fn test_generate_operations() {
661        let mut context = TextContext::new();
662        context.write("Test").unwrap();
663
664        let ops_bytes = context.generate_operations().unwrap();
665        let ops_string = String::from_utf8(ops_bytes).unwrap();
666        assert_eq!(ops_string, context.operations());
667    }
668
669    #[test]
670    fn test_method_chaining() {
671        let mut context = TextContext::new();
672        context
673            .set_font(Font::Courier, 10.0)
674            .at(50.0, 100.0)
675            .set_character_spacing(1.0)
676            .set_word_spacing(2.0);
677
678        assert_eq!(context.current_font(), &Font::Courier);
679        assert_eq!(context.font_size(), 10.0);
680        let (x, y) = context.position();
681        assert_eq!(x, 50.0);
682        assert_eq!(y, 100.0);
683    }
684
685    #[test]
686    fn test_text_matrix_access() {
687        let mut context = TextContext::new();
688        context.at(25.0, 75.0);
689
690        let matrix = context.text_matrix();
691        assert_eq!(matrix, [1.0, 0.0, 0.0, 1.0, 25.0, 75.0]);
692    }
693
694    #[test]
695    fn test_special_characters_encoding() {
696        let mut context = TextContext::new();
697        context.write("Test\nLine\tTab").unwrap();
698
699        let ops = context.operations();
700        assert!(ops.contains("\\n"));
701        assert!(ops.contains("\\t"));
702    }
703
704    #[test]
705    fn test_rendering_mode_fill() {
706        let mut context = TextContext::new();
707        context.set_rendering_mode(TextRenderingMode::Fill);
708
709        let ops = context.generate_text_state_operations();
710        assert!(ops.contains("0 Tr"));
711    }
712
713    #[test]
714    fn test_rendering_mode_stroke() {
715        let mut context = TextContext::new();
716        context.set_rendering_mode(TextRenderingMode::Stroke);
717
718        let ops = context.generate_text_state_operations();
719        assert!(ops.contains("1 Tr"));
720    }
721
722    #[test]
723    fn test_rendering_mode_fill_stroke() {
724        let mut context = TextContext::new();
725        context.set_rendering_mode(TextRenderingMode::FillStroke);
726
727        let ops = context.generate_text_state_operations();
728        assert!(ops.contains("2 Tr"));
729    }
730
731    #[test]
732    fn test_rendering_mode_invisible() {
733        let mut context = TextContext::new();
734        context.set_rendering_mode(TextRenderingMode::Invisible);
735
736        let ops = context.generate_text_state_operations();
737        assert!(ops.contains("3 Tr"));
738    }
739
740    #[test]
741    fn test_rendering_mode_fill_clip() {
742        let mut context = TextContext::new();
743        context.set_rendering_mode(TextRenderingMode::FillClip);
744
745        let ops = context.generate_text_state_operations();
746        assert!(ops.contains("4 Tr"));
747    }
748
749    #[test]
750    fn test_rendering_mode_stroke_clip() {
751        let mut context = TextContext::new();
752        context.set_rendering_mode(TextRenderingMode::StrokeClip);
753
754        let ops = context.generate_text_state_operations();
755        assert!(ops.contains("5 Tr"));
756    }
757
758    #[test]
759    fn test_rendering_mode_fill_stroke_clip() {
760        let mut context = TextContext::new();
761        context.set_rendering_mode(TextRenderingMode::FillStrokeClip);
762
763        let ops = context.generate_text_state_operations();
764        assert!(ops.contains("6 Tr"));
765    }
766
767    #[test]
768    fn test_rendering_mode_clip() {
769        let mut context = TextContext::new();
770        context.set_rendering_mode(TextRenderingMode::Clip);
771
772        let ops = context.generate_text_state_operations();
773        assert!(ops.contains("7 Tr"));
774    }
775
776    #[test]
777    fn test_text_state_parameters_chaining() {
778        let mut context = TextContext::new();
779        context
780            .set_character_spacing(1.5)
781            .set_word_spacing(2.0)
782            .set_horizontal_scaling(1.1)
783            .set_leading(14.0)
784            .set_text_rise(0.5)
785            .set_rendering_mode(TextRenderingMode::FillStroke);
786
787        let ops = context.generate_text_state_operations();
788        assert!(ops.contains("1.50 Tc"));
789        assert!(ops.contains("2.00 Tw"));
790        assert!(ops.contains("110.00 Tz"));
791        assert!(ops.contains("14.00 TL"));
792        assert!(ops.contains("0.50 Ts"));
793        assert!(ops.contains("2 Tr"));
794    }
795
796    #[test]
797    fn test_all_text_state_operators_generated() {
798        let mut context = TextContext::new();
799
800        // Test all operators in sequence
801        context.set_character_spacing(1.0); // Tc
802        context.set_word_spacing(2.0); // Tw
803        context.set_horizontal_scaling(1.2); // Tz
804        context.set_leading(15.0); // TL
805        context.set_text_rise(1.0); // Ts
806        context.set_rendering_mode(TextRenderingMode::Stroke); // Tr
807
808        let ops = context.generate_text_state_operations();
809
810        // Verify all PDF text state operators are present
811        assert!(
812            ops.contains("Tc"),
813            "Character spacing operator (Tc) not found"
814        );
815        assert!(ops.contains("Tw"), "Word spacing operator (Tw) not found");
816        assert!(
817            ops.contains("Tz"),
818            "Horizontal scaling operator (Tz) not found"
819        );
820        assert!(ops.contains("TL"), "Leading operator (TL) not found");
821        assert!(ops.contains("Ts"), "Text rise operator (Ts) not found");
822        assert!(
823            ops.contains("Tr"),
824            "Text rendering mode operator (Tr) not found"
825        );
826    }
827
828    #[test]
829    fn test_text_color_operations() {
830        use crate::Color;
831
832        let mut context = TextContext::new();
833
834        // Test RGB fill color
835        context.set_fill_color(Color::rgb(1.0, 0.0, 0.0));
836        context.apply_text_state_parameters();
837
838        let ops = context.operations();
839        assert!(
840            ops.contains("1.000 0.000 0.000 rg"),
841            "RGB fill color operator (rg) not found in: {ops}"
842        );
843
844        // Clear and test RGB stroke color
845        context.clear();
846        context.set_stroke_color(Color::rgb(0.0, 1.0, 0.0));
847        context.apply_text_state_parameters();
848
849        let ops = context.operations();
850        assert!(
851            ops.contains("0.000 1.000 0.000 RG"),
852            "RGB stroke color operator (RG) not found in: {ops}"
853        );
854
855        // Clear and test grayscale fill color
856        context.clear();
857        context.set_fill_color(Color::gray(0.5));
858        context.apply_text_state_parameters();
859
860        let ops = context.operations();
861        assert!(
862            ops.contains("0.500 g"),
863            "Gray fill color operator (g) not found in: {ops}"
864        );
865
866        // Clear and test CMYK stroke color
867        context.clear();
868        context.set_stroke_color(Color::cmyk(0.2, 0.3, 0.4, 0.1));
869        context.apply_text_state_parameters();
870
871        let ops = context.operations();
872        assert!(
873            ops.contains("0.200 0.300 0.400 0.100 K"),
874            "CMYK stroke color operator (K) not found in: {ops}"
875        );
876
877        // Test both fill and stroke colors together
878        context.clear();
879        context.set_fill_color(Color::rgb(1.0, 0.0, 0.0));
880        context.set_stroke_color(Color::rgb(0.0, 0.0, 1.0));
881        context.apply_text_state_parameters();
882
883        let ops = context.operations();
884        assert!(
885            ops.contains("1.000 0.000 0.000 rg") && ops.contains("0.000 0.000 1.000 RG"),
886            "Both fill and stroke colors not found in: {ops}"
887        );
888    }
889
890    // Issue #97: Test used_characters tracking
891    #[test]
892    fn test_used_characters_tracking_ascii() {
893        let mut context = TextContext::new();
894        context.write("Hello").unwrap();
895
896        let chars = context.get_used_characters();
897        assert!(chars.is_some());
898        let chars = chars.unwrap();
899        assert!(chars.contains(&'H'));
900        assert!(chars.contains(&'e'));
901        assert!(chars.contains(&'l'));
902        assert!(chars.contains(&'o'));
903        assert_eq!(chars.len(), 4); // H, e, l, o (l appears twice but HashSet dedupes)
904    }
905
906    #[test]
907    fn test_used_characters_tracking_cjk() {
908        let mut context = TextContext::new();
909        context.set_font(Font::Custom("NotoSansCJK".to_string()), 12.0);
910        context.write("中文测试").unwrap();
911
912        let chars = context.get_used_characters();
913        assert!(chars.is_some());
914        let chars = chars.unwrap();
915        assert!(chars.contains(&'中'));
916        assert!(chars.contains(&'文'));
917        assert!(chars.contains(&'测'));
918        assert!(chars.contains(&'试'));
919        assert_eq!(chars.len(), 4);
920    }
921
922    #[test]
923    fn test_used_characters_empty_initially() {
924        let context = TextContext::new();
925        assert!(context.get_used_characters().is_none());
926    }
927
928    #[test]
929    fn test_used_characters_multiple_writes() {
930        let mut context = TextContext::new();
931        context.write("AB").unwrap();
932        context.write("CD").unwrap();
933
934        let chars = context.get_used_characters();
935        assert!(chars.is_some());
936        let chars = chars.unwrap();
937        assert!(chars.contains(&'A'));
938        assert!(chars.contains(&'B'));
939        assert!(chars.contains(&'C'));
940        assert!(chars.contains(&'D'));
941        assert_eq!(chars.len(), 4);
942    }
943
944    /// RED for Phase 2 of the v2.7.0 IR refactor: with the legacy `String`
945    /// emission, `set_character_spacing(f64::NAN)` propagates `NaN` into a
946    /// `Tc` operator, which is invalid per ISO 32000-1 §7.3.3. Once the
947    /// migration routes Tc through `serialize_ops`, `finite_or_zero`
948    /// clamps non-finite values to `0.0` and the assertion below passes.
949    #[test]
950    fn nan_char_spacing_sanitised_at_emission() {
951        let mut ctx = TextContext::new();
952        ctx.set_character_spacing(f64::NAN);
953        ctx.write("hi").unwrap();
954        let ops = ctx.operations();
955        assert!(
956            ops.contains("0.00 Tc\n"),
957            "NaN char spacing must emit `0.00 Tc`, got: {ops:?}"
958        );
959        assert!(
960            !ops.contains("NaN") && !ops.contains("inf"),
961            "non-finite tokens must not appear in any Tc/Tw/Tz/TL/Ts emission, got: {ops:?}"
962        );
963    }
964
965    #[test]
966    fn pos_inf_word_spacing_sanitised_at_emission() {
967        let mut ctx = TextContext::new();
968        ctx.set_word_spacing(f64::INFINITY);
969        ctx.write("hi").unwrap();
970        let ops = ctx.operations();
971        assert!(
972            ops.contains("0.00 Tw\n"),
973            "+inf word spacing must emit `0.00 Tw`, got: {ops:?}"
974        );
975        assert!(
976            !ops.contains("inf"),
977            "`inf` must not appear in Tw output, got: {ops:?}"
978        );
979    }
980
981    #[test]
982    fn nan_horizontal_scaling_sanitised_at_emission() {
983        let mut ctx = TextContext::new();
984        ctx.set_horizontal_scaling(f64::NAN);
985        ctx.write("hi").unwrap();
986        let ops = ctx.operations();
987        assert!(
988            ops.contains("0.00 Tz\n"),
989            "NaN horizontal scaling must emit `0.00 Tz`, got: {ops:?}"
990        );
991    }
992
993    #[test]
994    fn nan_leading_and_text_rise_sanitised_at_emission() {
995        let mut ctx = TextContext::new();
996        ctx.set_leading(f64::NEG_INFINITY);
997        ctx.set_text_rise(f64::NAN);
998        ctx.write("hi").unwrap();
999        let ops = ctx.operations();
1000        assert!(
1001            ops.contains("0.00 TL\n"),
1002            "-inf leading must emit `0.00 TL`, got: {ops:?}"
1003        );
1004        assert!(
1005            ops.contains("0.00 Ts\n"),
1006            "NaN text rise must emit `0.00 Ts`, got: {ops:?}"
1007        );
1008    }
1009
1010    #[test]
1011    fn test_text_context_threads_metrics_store() {
1012        use crate::text::metrics::{FontMetrics, FontMetricsStore};
1013        let store = FontMetricsStore::new();
1014        let ctx = TextContext::with_metrics_store(Some(store.clone()));
1015        // The store handle round-trips.
1016        assert!(ctx.font_metrics_store_for_test().is_some());
1017        // Cloning shares state.
1018        store.register("X", FontMetrics::new(400));
1019        assert_eq!(
1020            ctx.font_metrics_store_for_test().unwrap().len(),
1021            1,
1022            "TextContext must hold a clone that shares the underlying registry"
1023        );
1024    }
1025}