Skip to main content

oxidize_pdf/text/
mod.rs

1pub mod cid_to_unicode;
2pub mod cmap;
3mod encoding;
4pub mod extraction;
5mod extraction_cmap;
6mod flow;
7mod font;
8pub mod font_manager;
9pub mod fonts;
10mod header_footer;
11pub mod invoice;
12mod layout;
13mod list;
14pub mod metrics;
15pub mod ocr;
16pub mod plaintext;
17pub mod structured;
18pub mod table;
19pub mod table_detection;
20pub mod text_block;
21pub mod validation;
22
23#[cfg(test)]
24mod cmap_tests;
25
26#[cfg(feature = "ocr-tesseract")]
27pub mod tesseract_provider;
28
29pub use encoding::{escape_pdf_string_literal, TextEncoding};
30pub use extraction::{
31    sanitize_extracted_text, ExtractedText, ExtractionOptions, TextExtractor, TextFragment,
32};
33pub use flow::{TextAlign, TextFlowContext};
34pub use font::{Font, FontEncoding, FontFamily, FontWithEncoding};
35pub use font_manager::{CustomFont, FontDescriptor, FontFlags, FontManager, FontMetrics, FontType};
36pub use header_footer::{HeaderFooter, HeaderFooterOptions, HeaderFooterPosition};
37pub use layout::{ColumnContent, ColumnLayout, ColumnOptions, TextFormat};
38pub use list::{
39    BulletStyle, ListElement, ListItem, ListOptions, ListStyle as ListStyleEnum, OrderedList,
40    OrderedListStyle, UnorderedList,
41};
42pub use metrics::{
43    measure_char, measure_char_with, measure_text, measure_text_with, split_into_words,
44    FontMetricsStore,
45};
46pub use ocr::{
47    CharacterConfidence, CorrectionCandidate, CorrectionReason, CorrectionSuggestion,
48    CorrectionType, FragmentType, ImagePreprocessing, MockOcrProvider, OcrEngine, OcrError,
49    OcrOptions, OcrPostProcessor, OcrProcessingResult, OcrProvider, OcrRegion, OcrResult,
50    OcrTextFragment, WordConfidence,
51};
52pub use plaintext::{LineBreakMode, PlainTextConfig, PlainTextExtractor, PlainTextResult};
53pub use table::{HeaderStyle, Table, TableCell, TableOptions};
54pub use text_block::{
55    compute_line_widths, measure_text_block, measure_text_block_with, TextBlockMetrics,
56};
57pub use validation::{MatchType, TextMatch, TextValidationResult, TextValidator};
58
59#[cfg(feature = "ocr-tesseract")]
60pub use tesseract_provider::{RustyTesseractConfig, RustyTesseractProvider};
61
62use crate::error::Result;
63use crate::Color;
64use std::collections::{HashMap, HashSet};
65
66/// Text rendering mode for PDF text operations.
67///
68/// Re-exported via `oxidize_pdf::text::TextRenderingMode`.
69#[derive(Clone, Copy, Debug, PartialEq, Eq)]
70pub enum TextRenderingMode {
71    /// Fill text (default)
72    Fill = 0,
73    /// Stroke text
74    Stroke = 1,
75    /// Fill and stroke text
76    FillStroke = 2,
77    /// Invisible text (for searchable text over images)
78    Invisible = 3,
79    /// Fill text and add to path for clipping
80    FillClip = 4,
81    /// Stroke text and add to path for clipping
82    StrokeClip = 5,
83    /// Fill and stroke text and add to path for clipping
84    FillStrokeClip = 6,
85    /// Add text to path for clipping (invisible)
86    Clip = 7,
87}
88
89#[derive(Clone)]
90pub struct TextContext {
91    operations: Vec<crate::graphics::ops::Op>,
92    current_font: Font,
93    font_size: f64,
94    text_matrix: [f64; 6],
95    // Pending position for next write operation
96    pending_position: Option<(f64, f64)>,
97    // Text state parameters
98    character_spacing: Option<f64>,
99    word_spacing: Option<f64>,
100    horizontal_scaling: Option<f64>,
101    leading: Option<f64>,
102    text_rise: Option<f64>,
103    rendering_mode: Option<TextRenderingMode>,
104    // Color parameters
105    fill_color: Option<Color>,
106    stroke_color: Option<Color>,
107    // Track used characters per custom-font name (issue #204 — a single
108    // global set caused every registered font to be subsetted with the
109    // same characters, so two fonts of the same family ended up with
110    // duplicated subsets). Builtin fonts are not tracked because they
111    // don't need subsetting. Extended by `write` whenever the active
112    // font is `Font::Custom`.
113    used_characters_by_font: HashMap<String, HashSet<char>>,
114    /// Per-document font metrics store threaded from `Page` (issue #230).
115    /// `None` means the built-in heuristic width tables are used.
116    /// Non-test callers arrive in Task 9-11 (Document integration).
117    #[allow(dead_code)]
118    pub(crate) font_metrics_store: Option<FontMetricsStore>,
119}
120
121impl Default for TextContext {
122    fn default() -> Self {
123        Self::new()
124    }
125}
126
127impl TextContext {
128    pub fn new() -> Self {
129        Self {
130            operations: Vec::new(),
131            current_font: Font::Helvetica,
132            font_size: 12.0,
133            text_matrix: [1.0, 0.0, 0.0, 1.0, 0.0, 0.0],
134            pending_position: None,
135            character_spacing: None,
136            word_spacing: None,
137            horizontal_scaling: None,
138            leading: None,
139            text_rise: None,
140            rendering_mode: None,
141            fill_color: None,
142            stroke_color: None,
143            used_characters_by_font: HashMap::new(),
144            font_metrics_store: None,
145        }
146    }
147
148    /// Create a `TextContext` bound to a per-document `FontMetricsStore`
149    /// (issue #230). `None` is equivalent to `TextContext::new()`.
150    ///
151    /// `pub(crate)` — wired by `Page::*_with_metrics()` constructors (Task 8).
152    /// Non-test callers for `Page::*_with_metrics` arrive in Tasks 9-11.
153    #[allow(dead_code)]
154    pub(crate) fn with_metrics_store(store: Option<FontMetricsStore>) -> Self {
155        let mut ctx = Self::default();
156        ctx.font_metrics_store = store;
157        ctx
158    }
159
160    /// Record `text` as drawn with the currently-active font, bucketed
161    /// under the font's PDF name (issue #204). Builtin and custom fonts
162    /// are both tracked; the writer later filters to the set of
163    /// registered custom fonts when subsetting.
164    fn record_used_chars(&mut self, text: &str) {
165        let name = match &self.current_font {
166            Font::Custom(name) => name.clone(),
167            builtin => builtin.pdf_name(),
168        };
169        self.used_characters_by_font
170            .entry(name)
171            .or_default()
172            .extend(text.chars());
173    }
174
175    /// Introspection helper for Task 7 tests (issue #230).
176    #[cfg(test)]
177    pub(crate) fn font_metrics_store_for_test(&self) -> Option<&FontMetricsStore> {
178        self.font_metrics_store.as_ref()
179    }
180
181    /// Get the characters used in this text context (merged across all
182    /// fonts). Test-only compatibility accessor; callers that need
183    /// per-font accuracy for subsetting should use
184    /// [`TextContext::get_used_characters_by_font`] (issue #204).
185    #[cfg(test)]
186    pub(crate) fn get_used_characters(&self) -> Option<HashSet<char>> {
187        let merged: HashSet<char> = self
188            .used_characters_by_font
189            .values()
190            .flat_map(|s| s.iter().copied())
191            .collect();
192        if merged.is_empty() {
193            None
194        } else {
195            Some(merged)
196        }
197    }
198
199    /// Get the per-font character map for font subsetting (issue #204).
200    pub(crate) fn get_used_characters_by_font(&self) -> &HashMap<String, HashSet<char>> {
201        &self.used_characters_by_font
202    }
203
204    pub fn set_font(&mut self, font: Font, size: f64) -> &mut Self {
205        self.current_font = font;
206        self.font_size = size;
207        self
208    }
209
210    /// Get the current font
211    #[allow(dead_code)]
212    pub(crate) fn current_font(&self) -> &Font {
213        &self.current_font
214    }
215
216    /// Current non-stroking (fill) colour, if one has been explicitly set.
217    /// Used by `Page::text_flow` to propagate the page-level text colour
218    /// into derived `TextFlowContext`s (issue #216).
219    pub(crate) fn fill_color(&self) -> Option<Color> {
220        self.fill_color
221    }
222
223    /// Accessors for the remaining text-state parameters (issue #222 —
224    /// Phase 6 of the v2.7.0 IR refactor). Used by `Page::text_flow` to
225    /// propagate the configured page-level state into derived
226    /// `TextFlowContext`s. Mirror of `fill_color()` above.
227    pub(crate) fn character_spacing(&self) -> Option<f64> {
228        self.character_spacing
229    }
230    pub(crate) fn word_spacing(&self) -> Option<f64> {
231        self.word_spacing
232    }
233    pub(crate) fn horizontal_scaling(&self) -> Option<f64> {
234        self.horizontal_scaling
235    }
236    pub(crate) fn leading(&self) -> Option<f64> {
237        self.leading
238    }
239    pub(crate) fn text_rise(&self) -> Option<f64> {
240        self.text_rise
241    }
242    pub(crate) fn rendering_mode(&self) -> Option<TextRenderingMode> {
243        self.rendering_mode
244    }
245    pub(crate) fn stroke_color(&self) -> Option<Color> {
246        self.stroke_color
247    }
248
249    pub fn at(&mut self, x: f64, y: f64) -> &mut Self {
250        // Update text_matrix immediately and store for write() operation
251        self.text_matrix[4] = x;
252        self.text_matrix[5] = y;
253        self.pending_position = Some((x, y));
254        self
255    }
256
257    pub fn write(&mut self, text: &str) -> Result<&mut Self> {
258        use crate::graphics::ops::Op;
259
260        self.operations.push(Op::BeginText);
261
262        // Set font
263        self.operations.push(Op::SetFont {
264            name: self.current_font.pdf_name(),
265            size: self.font_size,
266        });
267
268        // Apply text state parameters (Tc/Tw/Tz/TL/Ts/Tr + colour)
269        self.apply_text_state_parameters();
270
271        // Set text position using pending_position if available, otherwise use text_matrix
272        let (x, y) = if let Some((px, py)) = self.pending_position.take() {
273            (px, py)
274        } else {
275            (self.text_matrix[4], self.text_matrix[5])
276        };
277        self.operations.push(Op::SetTextPosition { x, y });
278
279        // Choose encoding based on font type
280        match &self.current_font {
281            Font::Custom(_) => {
282                // For custom fonts (CJK), use UTF-16BE encoding with hex strings
283                let utf16_units: Vec<u16> = text.encode_utf16().collect();
284                let mut hex = String::new();
285                for unit in utf16_units {
286                    use std::fmt::Write as _;
287                    write!(
288                        &mut hex,
289                        "{:02X}{:02X}",
290                        (unit >> 8) as u8,
291                        (unit & 0xFF) as u8
292                    )
293                    .expect("write to String never fails");
294                }
295                self.operations.push(Op::ShowTextHex(hex.into_bytes()));
296            }
297            _ => {
298                // For standard fonts, use WinAnsiEncoding with literal-string escaping.
299                let encoding = TextEncoding::WinAnsiEncoding;
300                let encoded_bytes = encoding.encode(text);
301
302                let mut buf = Vec::with_capacity(encoded_bytes.len());
303                for &byte in &encoded_bytes {
304                    match byte {
305                        b'(' => buf.extend_from_slice(b"\\("),
306                        b')' => buf.extend_from_slice(b"\\)"),
307                        b'\\' => buf.extend_from_slice(b"\\\\"),
308                        b'\n' => buf.extend_from_slice(b"\\n"),
309                        b'\r' => buf.extend_from_slice(b"\\r"),
310                        b'\t' => buf.extend_from_slice(b"\\t"),
311                        0x20..=0x7E => buf.push(byte),
312                        _ => {
313                            use std::io::Write as _;
314                            write!(&mut buf, "\\{byte:03o}").expect("write to Vec<u8> never fails");
315                        }
316                    }
317                }
318                self.operations.push(Op::ShowText(buf));
319            }
320        }
321
322        // Track used characters for font subsetting bucketed by the
323        // active custom font (issue #204).
324        self.record_used_chars(text);
325
326        self.operations.push(Op::EndText);
327
328        Ok(self)
329    }
330
331    pub fn write_line(&mut self, text: &str) -> Result<&mut Self> {
332        self.write(text)?;
333        self.text_matrix[5] -= self.font_size * 1.2; // Move down for next line
334        Ok(self)
335    }
336
337    pub fn set_character_spacing(&mut self, spacing: f64) -> &mut Self {
338        self.character_spacing = Some(spacing);
339        self
340    }
341
342    pub fn set_word_spacing(&mut self, spacing: f64) -> &mut Self {
343        self.word_spacing = Some(spacing);
344        self
345    }
346
347    pub fn set_horizontal_scaling(&mut self, scale: f64) -> &mut Self {
348        self.horizontal_scaling = Some(scale);
349        self
350    }
351
352    pub fn set_leading(&mut self, leading: f64) -> &mut Self {
353        self.leading = Some(leading);
354        self
355    }
356
357    pub fn set_text_rise(&mut self, rise: f64) -> &mut Self {
358        self.text_rise = Some(rise);
359        self
360    }
361
362    /// Set the text rendering mode
363    pub fn set_rendering_mode(&mut self, mode: TextRenderingMode) -> &mut Self {
364        self.rendering_mode = Some(mode);
365        self
366    }
367
368    /// Set the text fill color
369    pub fn set_fill_color(&mut self, color: Color) -> &mut Self {
370        self.fill_color = Some(color);
371        self
372    }
373
374    /// Set the text stroke color
375    pub fn set_stroke_color(&mut self, color: Color) -> &mut Self {
376        self.stroke_color = Some(color);
377        self
378    }
379
380    /// Apply text state parameters as `Op` values pushed into `self.operations`.
381    ///
382    /// All non-finite floats are clamped to `0.0` at serialisation time by
383    /// `serialize_ops` (issues #220 + #221 extend to non-colour emitters in
384    /// the v2.7.0 IR refactor).
385    fn apply_text_state_parameters(&mut self) {
386        use crate::graphics::ops::Op;
387
388        if let Some(spacing) = self.character_spacing {
389            self.operations.push(Op::SetCharSpacing(spacing));
390        }
391        if let Some(spacing) = self.word_spacing {
392            self.operations.push(Op::SetWordSpacing(spacing));
393        }
394        if let Some(scale) = self.horizontal_scaling {
395            // Tz operator takes a percentage. The setter accepts a 0.0–1.0
396            // ratio and the original implementation multiplied by 100 at
397            // emission; preserve that contract.
398            self.operations
399                .push(Op::SetHorizontalScaling(scale * 100.0));
400        }
401        if let Some(leading) = self.leading {
402            self.operations.push(Op::SetLeading(leading));
403        }
404        if let Some(rise) = self.text_rise {
405            self.operations.push(Op::SetTextRise(rise));
406        }
407        if let Some(mode) = self.rendering_mode {
408            self.operations.push(Op::SetRenderingMode(mode as u8));
409        }
410
411        // Fill / stroke colour delegates to the IR variants which in turn
412        // delegate to `write_fill_color_bytes` / `write_stroke_color_bytes`
413        // (issues #220 + #221).
414        if let Some(color) = self.fill_color {
415            self.operations.push(Op::SetFillColor(color));
416        }
417        if let Some(color) = self.stroke_color {
418            self.operations.push(Op::SetStrokeColor(color));
419        }
420    }
421
422    pub(crate) fn generate_operations(&self) -> Result<Vec<u8>> {
423        let mut buf = Vec::new();
424        crate::graphics::ops::serialize_ops(&mut buf, &self.operations);
425        Ok(buf)
426    }
427
428    /// Take ownership of the accumulated `Op` buffer, leaving an empty
429    /// `Vec` in its place. Mirror of `GraphicsContext::drain_ops` —
430    /// used by `Page` to flush the text buffer into a unified content
431    /// stream on context switch (issue #227).
432    pub(crate) fn drain_ops(&mut self) -> Vec<crate::graphics::ops::Op> {
433        std::mem::take(&mut self.operations)
434    }
435
436    /// Read-only access to the operation list.
437    pub(crate) fn ops_slice(&self) -> &[crate::graphics::ops::Op] {
438        &self.operations
439    }
440
441    /// Appends a raw PDF operation to the text context
442    ///
443    /// This is used internally for marked content operators (BDC/EMC) and other
444    /// low-level PDF operations that need to be interleaved with text operations.
445    pub(crate) fn append_raw_operation(&mut self, operation: &str) {
446        self.operations
447            .push(crate::graphics::ops::Op::Raw(operation.as_bytes().to_vec()));
448    }
449
450    /// Get the current font size
451    pub fn font_size(&self) -> f64 {
452        self.font_size
453    }
454
455    /// Get the current text matrix
456    pub fn text_matrix(&self) -> [f64; 6] {
457        self.text_matrix
458    }
459
460    /// Get the current position
461    pub fn position(&self) -> (f64, f64) {
462        (self.text_matrix[4], self.text_matrix[5])
463    }
464
465    /// Clear all operations and reset text state parameters
466    pub fn clear(&mut self) {
467        self.operations.clear();
468        self.character_spacing = None;
469        self.word_spacing = None;
470        self.horizontal_scaling = None;
471        self.leading = None;
472        self.text_rise = None;
473        self.rendering_mode = None;
474        self.fill_color = None;
475        self.stroke_color = None;
476    }
477
478    /// Get the operations as a serialised PDF content-stream `String`.
479    ///
480    /// Pre-2.7.0 this returned `&str`. The IR migration replaced the
481    /// internal `String` buffer with a typed `Vec<Op>`, so the legacy
482    /// borrow is materialised on demand. Internal callers prefer
483    /// `generate_operations()` which returns the byte buffer directly.
484    pub fn operations(&self) -> String {
485        crate::graphics::ops::ops_to_string(&self.operations)
486    }
487
488    /// Generate text state operations for testing purposes.
489    /// Routes through the IR so the same sanitisation applies.
490    #[cfg(test)]
491    pub fn generate_text_state_operations(&self) -> String {
492        use crate::graphics::ops::{ops_to_string, Op};
493
494        let mut ops = Vec::new();
495        if let Some(spacing) = self.character_spacing {
496            ops.push(Op::SetCharSpacing(spacing));
497        }
498        if let Some(spacing) = self.word_spacing {
499            ops.push(Op::SetWordSpacing(spacing));
500        }
501        if let Some(scale) = self.horizontal_scaling {
502            ops.push(Op::SetHorizontalScaling(scale * 100.0));
503        }
504        if let Some(leading) = self.leading {
505            ops.push(Op::SetLeading(leading));
506        }
507        if let Some(rise) = self.text_rise {
508            ops.push(Op::SetTextRise(rise));
509        }
510        if let Some(mode) = self.rendering_mode {
511            ops.push(Op::SetRenderingMode(mode as u8));
512        }
513        ops_to_string(&ops)
514    }
515}
516
517#[cfg(test)]
518mod tests {
519    use super::*;
520
521    #[test]
522    fn test_text_context_new() {
523        let context = TextContext::new();
524        assert_eq!(context.current_font, Font::Helvetica);
525        assert_eq!(context.font_size, 12.0);
526        assert_eq!(context.text_matrix, [1.0, 0.0, 0.0, 1.0, 0.0, 0.0]);
527        assert!(context.operations.is_empty());
528    }
529
530    #[test]
531    fn test_text_context_default() {
532        let context = TextContext::default();
533        assert_eq!(context.current_font, Font::Helvetica);
534        assert_eq!(context.font_size, 12.0);
535    }
536
537    #[test]
538    fn test_set_font() {
539        let mut context = TextContext::new();
540        context.set_font(Font::TimesBold, 14.0);
541        assert_eq!(context.current_font, Font::TimesBold);
542        assert_eq!(context.font_size, 14.0);
543    }
544
545    #[test]
546    fn test_position() {
547        let mut context = TextContext::new();
548        context.at(100.0, 200.0);
549        let (x, y) = context.position();
550        assert_eq!(x, 100.0);
551        assert_eq!(y, 200.0);
552        assert_eq!(context.text_matrix[4], 100.0);
553        assert_eq!(context.text_matrix[5], 200.0);
554    }
555
556    #[test]
557    fn test_write_simple_text() {
558        let mut context = TextContext::new();
559        context.write("Hello").unwrap();
560
561        let ops = context.operations();
562        assert!(ops.contains("BT\n"));
563        assert!(ops.contains("ET\n"));
564        assert!(ops.contains("/Helvetica 12 Tf"));
565        assert!(ops.contains("(Hello) Tj"));
566    }
567
568    #[test]
569    fn test_write_text_with_escaping() {
570        let mut context = TextContext::new();
571        context.write("(Hello)").unwrap();
572
573        let ops = context.operations();
574        assert!(ops.contains("(\\(Hello\\)) Tj"));
575    }
576
577    #[test]
578    fn test_write_line() {
579        let mut context = TextContext::new();
580        let initial_y = context.text_matrix[5];
581        context.write_line("Line 1").unwrap();
582
583        // Y position should have moved down
584        let new_y = context.text_matrix[5];
585        assert!(new_y < initial_y);
586        assert_eq!(new_y, initial_y - 12.0 * 1.2); // font_size * 1.2
587    }
588
589    #[test]
590    fn test_character_spacing() {
591        let mut context = TextContext::new();
592        context.set_character_spacing(2.5);
593
594        let ops = context.generate_text_state_operations();
595        assert!(ops.contains("2.50 Tc"));
596    }
597
598    #[test]
599    fn test_word_spacing() {
600        let mut context = TextContext::new();
601        context.set_word_spacing(1.5);
602
603        let ops = context.generate_text_state_operations();
604        assert!(ops.contains("1.50 Tw"));
605    }
606
607    #[test]
608    fn test_horizontal_scaling() {
609        let mut context = TextContext::new();
610        context.set_horizontal_scaling(1.25);
611
612        let ops = context.generate_text_state_operations();
613        assert!(ops.contains("125.00 Tz")); // 1.25 * 100
614    }
615
616    #[test]
617    fn test_leading() {
618        let mut context = TextContext::new();
619        context.set_leading(15.0);
620
621        let ops = context.generate_text_state_operations();
622        assert!(ops.contains("15.00 TL"));
623    }
624
625    #[test]
626    fn test_text_rise() {
627        let mut context = TextContext::new();
628        context.set_text_rise(3.0);
629
630        let ops = context.generate_text_state_operations();
631        assert!(ops.contains("3.00 Ts"));
632    }
633
634    #[test]
635    fn test_clear() {
636        let mut context = TextContext::new();
637        context.write("Hello").unwrap();
638        assert!(!context.operations().is_empty());
639
640        context.clear();
641        assert!(context.operations().is_empty());
642    }
643
644    #[test]
645    fn test_generate_operations() {
646        let mut context = TextContext::new();
647        context.write("Test").unwrap();
648
649        let ops_bytes = context.generate_operations().unwrap();
650        let ops_string = String::from_utf8(ops_bytes).unwrap();
651        assert_eq!(ops_string, context.operations());
652    }
653
654    #[test]
655    fn test_method_chaining() {
656        let mut context = TextContext::new();
657        context
658            .set_font(Font::Courier, 10.0)
659            .at(50.0, 100.0)
660            .set_character_spacing(1.0)
661            .set_word_spacing(2.0);
662
663        assert_eq!(context.current_font(), &Font::Courier);
664        assert_eq!(context.font_size(), 10.0);
665        let (x, y) = context.position();
666        assert_eq!(x, 50.0);
667        assert_eq!(y, 100.0);
668    }
669
670    #[test]
671    fn test_text_matrix_access() {
672        let mut context = TextContext::new();
673        context.at(25.0, 75.0);
674
675        let matrix = context.text_matrix();
676        assert_eq!(matrix, [1.0, 0.0, 0.0, 1.0, 25.0, 75.0]);
677    }
678
679    #[test]
680    fn test_special_characters_encoding() {
681        let mut context = TextContext::new();
682        context.write("Test\nLine\tTab").unwrap();
683
684        let ops = context.operations();
685        assert!(ops.contains("\\n"));
686        assert!(ops.contains("\\t"));
687    }
688
689    #[test]
690    fn test_rendering_mode_fill() {
691        let mut context = TextContext::new();
692        context.set_rendering_mode(TextRenderingMode::Fill);
693
694        let ops = context.generate_text_state_operations();
695        assert!(ops.contains("0 Tr"));
696    }
697
698    #[test]
699    fn test_rendering_mode_stroke() {
700        let mut context = TextContext::new();
701        context.set_rendering_mode(TextRenderingMode::Stroke);
702
703        let ops = context.generate_text_state_operations();
704        assert!(ops.contains("1 Tr"));
705    }
706
707    #[test]
708    fn test_rendering_mode_fill_stroke() {
709        let mut context = TextContext::new();
710        context.set_rendering_mode(TextRenderingMode::FillStroke);
711
712        let ops = context.generate_text_state_operations();
713        assert!(ops.contains("2 Tr"));
714    }
715
716    #[test]
717    fn test_rendering_mode_invisible() {
718        let mut context = TextContext::new();
719        context.set_rendering_mode(TextRenderingMode::Invisible);
720
721        let ops = context.generate_text_state_operations();
722        assert!(ops.contains("3 Tr"));
723    }
724
725    #[test]
726    fn test_rendering_mode_fill_clip() {
727        let mut context = TextContext::new();
728        context.set_rendering_mode(TextRenderingMode::FillClip);
729
730        let ops = context.generate_text_state_operations();
731        assert!(ops.contains("4 Tr"));
732    }
733
734    #[test]
735    fn test_rendering_mode_stroke_clip() {
736        let mut context = TextContext::new();
737        context.set_rendering_mode(TextRenderingMode::StrokeClip);
738
739        let ops = context.generate_text_state_operations();
740        assert!(ops.contains("5 Tr"));
741    }
742
743    #[test]
744    fn test_rendering_mode_fill_stroke_clip() {
745        let mut context = TextContext::new();
746        context.set_rendering_mode(TextRenderingMode::FillStrokeClip);
747
748        let ops = context.generate_text_state_operations();
749        assert!(ops.contains("6 Tr"));
750    }
751
752    #[test]
753    fn test_rendering_mode_clip() {
754        let mut context = TextContext::new();
755        context.set_rendering_mode(TextRenderingMode::Clip);
756
757        let ops = context.generate_text_state_operations();
758        assert!(ops.contains("7 Tr"));
759    }
760
761    #[test]
762    fn test_text_state_parameters_chaining() {
763        let mut context = TextContext::new();
764        context
765            .set_character_spacing(1.5)
766            .set_word_spacing(2.0)
767            .set_horizontal_scaling(1.1)
768            .set_leading(14.0)
769            .set_text_rise(0.5)
770            .set_rendering_mode(TextRenderingMode::FillStroke);
771
772        let ops = context.generate_text_state_operations();
773        assert!(ops.contains("1.50 Tc"));
774        assert!(ops.contains("2.00 Tw"));
775        assert!(ops.contains("110.00 Tz"));
776        assert!(ops.contains("14.00 TL"));
777        assert!(ops.contains("0.50 Ts"));
778        assert!(ops.contains("2 Tr"));
779    }
780
781    #[test]
782    fn test_all_text_state_operators_generated() {
783        let mut context = TextContext::new();
784
785        // Test all operators in sequence
786        context.set_character_spacing(1.0); // Tc
787        context.set_word_spacing(2.0); // Tw
788        context.set_horizontal_scaling(1.2); // Tz
789        context.set_leading(15.0); // TL
790        context.set_text_rise(1.0); // Ts
791        context.set_rendering_mode(TextRenderingMode::Stroke); // Tr
792
793        let ops = context.generate_text_state_operations();
794
795        // Verify all PDF text state operators are present
796        assert!(
797            ops.contains("Tc"),
798            "Character spacing operator (Tc) not found"
799        );
800        assert!(ops.contains("Tw"), "Word spacing operator (Tw) not found");
801        assert!(
802            ops.contains("Tz"),
803            "Horizontal scaling operator (Tz) not found"
804        );
805        assert!(ops.contains("TL"), "Leading operator (TL) not found");
806        assert!(ops.contains("Ts"), "Text rise operator (Ts) not found");
807        assert!(
808            ops.contains("Tr"),
809            "Text rendering mode operator (Tr) not found"
810        );
811    }
812
813    #[test]
814    fn test_text_color_operations() {
815        use crate::Color;
816
817        let mut context = TextContext::new();
818
819        // Test RGB fill color
820        context.set_fill_color(Color::rgb(1.0, 0.0, 0.0));
821        context.apply_text_state_parameters();
822
823        let ops = context.operations();
824        assert!(
825            ops.contains("1.000 0.000 0.000 rg"),
826            "RGB fill color operator (rg) not found in: {ops}"
827        );
828
829        // Clear and test RGB stroke color
830        context.clear();
831        context.set_stroke_color(Color::rgb(0.0, 1.0, 0.0));
832        context.apply_text_state_parameters();
833
834        let ops = context.operations();
835        assert!(
836            ops.contains("0.000 1.000 0.000 RG"),
837            "RGB stroke color operator (RG) not found in: {ops}"
838        );
839
840        // Clear and test grayscale fill color
841        context.clear();
842        context.set_fill_color(Color::gray(0.5));
843        context.apply_text_state_parameters();
844
845        let ops = context.operations();
846        assert!(
847            ops.contains("0.500 g"),
848            "Gray fill color operator (g) not found in: {ops}"
849        );
850
851        // Clear and test CMYK stroke color
852        context.clear();
853        context.set_stroke_color(Color::cmyk(0.2, 0.3, 0.4, 0.1));
854        context.apply_text_state_parameters();
855
856        let ops = context.operations();
857        assert!(
858            ops.contains("0.200 0.300 0.400 0.100 K"),
859            "CMYK stroke color operator (K) not found in: {ops}"
860        );
861
862        // Test both fill and stroke colors together
863        context.clear();
864        context.set_fill_color(Color::rgb(1.0, 0.0, 0.0));
865        context.set_stroke_color(Color::rgb(0.0, 0.0, 1.0));
866        context.apply_text_state_parameters();
867
868        let ops = context.operations();
869        assert!(
870            ops.contains("1.000 0.000 0.000 rg") && ops.contains("0.000 0.000 1.000 RG"),
871            "Both fill and stroke colors not found in: {ops}"
872        );
873    }
874
875    // Issue #97: Test used_characters tracking
876    #[test]
877    fn test_used_characters_tracking_ascii() {
878        let mut context = TextContext::new();
879        context.write("Hello").unwrap();
880
881        let chars = context.get_used_characters();
882        assert!(chars.is_some());
883        let chars = chars.unwrap();
884        assert!(chars.contains(&'H'));
885        assert!(chars.contains(&'e'));
886        assert!(chars.contains(&'l'));
887        assert!(chars.contains(&'o'));
888        assert_eq!(chars.len(), 4); // H, e, l, o (l appears twice but HashSet dedupes)
889    }
890
891    #[test]
892    fn test_used_characters_tracking_cjk() {
893        let mut context = TextContext::new();
894        context.set_font(Font::Custom("NotoSansCJK".to_string()), 12.0);
895        context.write("中文测试").unwrap();
896
897        let chars = context.get_used_characters();
898        assert!(chars.is_some());
899        let chars = chars.unwrap();
900        assert!(chars.contains(&'中'));
901        assert!(chars.contains(&'文'));
902        assert!(chars.contains(&'测'));
903        assert!(chars.contains(&'试'));
904        assert_eq!(chars.len(), 4);
905    }
906
907    #[test]
908    fn test_used_characters_empty_initially() {
909        let context = TextContext::new();
910        assert!(context.get_used_characters().is_none());
911    }
912
913    #[test]
914    fn test_used_characters_multiple_writes() {
915        let mut context = TextContext::new();
916        context.write("AB").unwrap();
917        context.write("CD").unwrap();
918
919        let chars = context.get_used_characters();
920        assert!(chars.is_some());
921        let chars = chars.unwrap();
922        assert!(chars.contains(&'A'));
923        assert!(chars.contains(&'B'));
924        assert!(chars.contains(&'C'));
925        assert!(chars.contains(&'D'));
926        assert_eq!(chars.len(), 4);
927    }
928
929    /// RED for Phase 2 of the v2.7.0 IR refactor: with the legacy `String`
930    /// emission, `set_character_spacing(f64::NAN)` propagates `NaN` into a
931    /// `Tc` operator, which is invalid per ISO 32000-1 §7.3.3. Once the
932    /// migration routes Tc through `serialize_ops`, `finite_or_zero`
933    /// clamps non-finite values to `0.0` and the assertion below passes.
934    #[test]
935    fn nan_char_spacing_sanitised_at_emission() {
936        let mut ctx = TextContext::new();
937        ctx.set_character_spacing(f64::NAN);
938        ctx.write("hi").unwrap();
939        let ops = ctx.operations();
940        assert!(
941            ops.contains("0.00 Tc\n"),
942            "NaN char spacing must emit `0.00 Tc`, got: {ops:?}"
943        );
944        assert!(
945            !ops.contains("NaN") && !ops.contains("inf"),
946            "non-finite tokens must not appear in any Tc/Tw/Tz/TL/Ts emission, got: {ops:?}"
947        );
948    }
949
950    #[test]
951    fn pos_inf_word_spacing_sanitised_at_emission() {
952        let mut ctx = TextContext::new();
953        ctx.set_word_spacing(f64::INFINITY);
954        ctx.write("hi").unwrap();
955        let ops = ctx.operations();
956        assert!(
957            ops.contains("0.00 Tw\n"),
958            "+inf word spacing must emit `0.00 Tw`, got: {ops:?}"
959        );
960        assert!(
961            !ops.contains("inf"),
962            "`inf` must not appear in Tw output, got: {ops:?}"
963        );
964    }
965
966    #[test]
967    fn nan_horizontal_scaling_sanitised_at_emission() {
968        let mut ctx = TextContext::new();
969        ctx.set_horizontal_scaling(f64::NAN);
970        ctx.write("hi").unwrap();
971        let ops = ctx.operations();
972        assert!(
973            ops.contains("0.00 Tz\n"),
974            "NaN horizontal scaling must emit `0.00 Tz`, got: {ops:?}"
975        );
976    }
977
978    #[test]
979    fn nan_leading_and_text_rise_sanitised_at_emission() {
980        let mut ctx = TextContext::new();
981        ctx.set_leading(f64::NEG_INFINITY);
982        ctx.set_text_rise(f64::NAN);
983        ctx.write("hi").unwrap();
984        let ops = ctx.operations();
985        assert!(
986            ops.contains("0.00 TL\n"),
987            "-inf leading must emit `0.00 TL`, got: {ops:?}"
988        );
989        assert!(
990            ops.contains("0.00 Ts\n"),
991            "NaN text rise must emit `0.00 Ts`, got: {ops:?}"
992        );
993    }
994
995    #[test]
996    fn test_text_context_threads_metrics_store() {
997        use crate::text::metrics::{FontMetrics, FontMetricsStore};
998        let store = FontMetricsStore::new();
999        let ctx = TextContext::with_metrics_store(Some(store.clone()));
1000        // The store handle round-trips.
1001        assert!(ctx.font_metrics_store_for_test().is_some());
1002        // Cloning shares state.
1003        store.register("X", FontMetrics::new(400));
1004        assert_eq!(
1005            ctx.font_metrics_store_for_test().unwrap().len(),
1006            1,
1007            "TextContext must hold a clone that shares the underlying registry"
1008        );
1009    }
1010}