Skip to main content

oxidize_pdf/text/
mod.rs

1pub mod cid_to_unicode;
2pub mod cmap;
3mod encoding;
4pub mod extraction;
5mod extraction_cmap;
6mod flow;
7mod font;
8pub mod font_manager;
9pub mod fonts;
10mod header_footer;
11pub mod invoice;
12mod layout;
13mod list;
14pub mod metrics;
15pub mod ocr;
16pub mod plaintext;
17pub mod structured;
18pub mod table;
19pub mod table_detection;
20pub mod text_block;
21pub mod validation;
22
23#[cfg(test)]
24mod cmap_tests;
25
26#[cfg(feature = "ocr-tesseract")]
27pub mod tesseract_provider;
28
29pub use encoding::{escape_pdf_string_literal, TextEncoding};
30pub use extraction::{
31    sanitize_extracted_text, ExtractedText, ExtractionOptions, TextExtractor, TextFragment,
32};
33pub use flow::{TextAlign, TextFlowContext};
34pub use font::{Font, FontEncoding, FontFamily, FontWithEncoding};
35pub use font_manager::{CustomFont, FontDescriptor, FontFlags, FontManager, FontMetrics, FontType};
36pub use header_footer::{HeaderFooter, HeaderFooterOptions, HeaderFooterPosition};
37pub use layout::{ColumnContent, ColumnLayout, ColumnOptions, TextFormat};
38pub use list::{
39    BulletStyle, ListElement, ListItem, ListOptions, ListStyle as ListStyleEnum, OrderedList,
40    OrderedListStyle, UnorderedList,
41};
42pub use metrics::{
43    measure_char, measure_char_with, measure_text, measure_text_with, split_into_words,
44    FontMetricsStore,
45};
46pub use ocr::{
47    CharacterConfidence, CorrectionCandidate, CorrectionReason, CorrectionSuggestion,
48    CorrectionType, FragmentType, ImagePreprocessing, MockOcrProvider, OcrEngine, OcrError,
49    OcrOptions, OcrPostProcessor, OcrProcessingResult, OcrProvider, OcrRegion, OcrResult,
50    OcrTextFragment, WordConfidence,
51};
52pub use plaintext::{LineBreakMode, PlainTextConfig, PlainTextExtractor, PlainTextResult};
53pub use table::{HeaderStyle, Table, TableCell, TableOptions};
54pub use text_block::{
55    compute_line_widths, measure_text_block, measure_text_block_with, TextBlockMetrics,
56};
57pub use validation::{MatchType, TextMatch, TextValidationResult, TextValidator};
58
59#[cfg(feature = "ocr-tesseract")]
60pub use tesseract_provider::{RustyTesseractConfig, RustyTesseractProvider};
61
62use crate::error::Result;
63use crate::Color;
64use std::collections::{HashMap, HashSet};
65
66/// Text rendering mode for PDF text operations.
67///
68/// Re-exported via `oxidize_pdf::text::TextRenderingMode`.
69#[derive(Clone, Copy, Debug, PartialEq, Eq)]
70pub enum TextRenderingMode {
71    /// Fill text (default)
72    Fill = 0,
73    /// Stroke text
74    Stroke = 1,
75    /// Fill and stroke text
76    FillStroke = 2,
77    /// Invisible text (for searchable text over images)
78    Invisible = 3,
79    /// Fill text and add to path for clipping
80    FillClip = 4,
81    /// Stroke text and add to path for clipping
82    StrokeClip = 5,
83    /// Fill and stroke text and add to path for clipping
84    FillStrokeClip = 6,
85    /// Add text to path for clipping (invisible)
86    Clip = 7,
87}
88
89/// Build the show-text IR op for `text` rendered with `font`. Single
90/// emission path shared by `TextContext::write` and
91/// `TextFlowContext::write_wrapped` so the two cannot diverge on encoding
92/// or escaping (issue #240 — pre-fix, the flow path emitted raw UTF-8
93/// bytes inside the literal `( … ) Tj` and any character outside ASCII
94/// rendered as Windows-1252 mojibake).
95///
96/// - `Font::Custom(_)` → UTF-16BE hex string per ISO 32000-1 §9.10.3,
97///   wrapped in `Op::ShowTextHex` so the writer emits `< … > Tj`.
98/// - Any builtin font → bytes are first WinAnsi-encoded
99///   ([`TextEncoding::WinAnsiEncoding`]) and then escaped for inclusion
100///   in a PDF string literal via
101///   [`encoding::escape_show_text_literal_bytes`].
102pub(crate) fn build_show_text_op(text: &str, font: &Font) -> crate::graphics::ops::Op {
103    use crate::graphics::ops::Op;
104
105    match font {
106        Font::Custom(_) => {
107            let utf16_units: Vec<u16> = text.encode_utf16().collect();
108            let mut hex = String::with_capacity(utf16_units.len() * 4);
109            for unit in utf16_units {
110                use std::fmt::Write as _;
111                write!(
112                    &mut hex,
113                    "{:02X}{:02X}",
114                    (unit >> 8) as u8,
115                    (unit & 0xFF) as u8
116                )
117                .expect("write to String never fails");
118            }
119            Op::ShowTextHex(hex.into_bytes())
120        }
121        _ => {
122            let encoded = TextEncoding::WinAnsiEncoding.encode(text);
123            Op::ShowText(encoding::escape_show_text_literal_bytes(&encoded))
124        }
125    }
126}
127
128#[derive(Clone)]
129pub struct TextContext {
130    operations: Vec<crate::graphics::ops::Op>,
131    current_font: Font,
132    font_size: f64,
133    text_matrix: [f64; 6],
134    // Pending position for next write operation
135    pending_position: Option<(f64, f64)>,
136    // Text state parameters
137    character_spacing: Option<f64>,
138    word_spacing: Option<f64>,
139    horizontal_scaling: Option<f64>,
140    leading: Option<f64>,
141    text_rise: Option<f64>,
142    rendering_mode: Option<TextRenderingMode>,
143    // Color parameters
144    fill_color: Option<Color>,
145    stroke_color: Option<Color>,
146    // Track used characters per custom-font name (issue #204 — a single
147    // global set caused every registered font to be subsetted with the
148    // same characters, so two fonts of the same family ended up with
149    // duplicated subsets). Builtin fonts are not tracked because they
150    // don't need subsetting. Extended by `write` whenever the active
151    // font is `Font::Custom`.
152    used_characters_by_font: HashMap<String, HashSet<char>>,
153    /// Per-document font metrics store threaded from `Page` (issue #230).
154    /// `None` means the built-in heuristic width tables are used.
155    /// Non-test callers arrive in Task 9-11 (Document integration).
156    #[allow(dead_code)]
157    pub(crate) font_metrics_store: Option<FontMetricsStore>,
158}
159
160impl Default for TextContext {
161    fn default() -> Self {
162        Self::new()
163    }
164}
165
166impl TextContext {
167    pub fn new() -> Self {
168        Self {
169            operations: Vec::new(),
170            current_font: Font::Helvetica,
171            font_size: 12.0,
172            text_matrix: [1.0, 0.0, 0.0, 1.0, 0.0, 0.0],
173            pending_position: None,
174            character_spacing: None,
175            word_spacing: None,
176            horizontal_scaling: None,
177            leading: None,
178            text_rise: None,
179            rendering_mode: None,
180            fill_color: None,
181            stroke_color: None,
182            used_characters_by_font: HashMap::new(),
183            font_metrics_store: None,
184        }
185    }
186
187    /// Create a `TextContext` bound to a per-document `FontMetricsStore`
188    /// (issue #230). `None` is equivalent to `TextContext::new()`.
189    ///
190    /// `pub(crate)` — wired by `Page::*_with_metrics()` constructors and
191    /// by `Document::new_page_*()` factories.
192    pub(crate) fn with_metrics_store(store: Option<FontMetricsStore>) -> Self {
193        let mut ctx = Self::default();
194        ctx.font_metrics_store = store;
195        ctx
196    }
197
198    /// Inject or replace the per-Document `FontMetricsStore` on an
199    /// already-constructed context. Preserves accumulated ops and any
200    /// other state — only the `font_metrics_store` field is mutated.
201    ///
202    /// Called by `Document::add_page` for pages constructed via
203    /// `Page::a4()` / `Page::letter()` / `Page::new()` (those start with
204    /// `font_metrics_store: None` and may already carry ops the caller
205    /// pushed before transferring ownership to the Document).
206    pub(crate) fn set_metrics_store(&mut self, store: Option<FontMetricsStore>) {
207        self.font_metrics_store = store;
208    }
209
210    /// Record `text` as drawn with the currently-active font, bucketed
211    /// under the font's PDF name (issue #204). Builtin and custom fonts
212    /// are both tracked; the writer later filters to the set of
213    /// registered custom fonts when subsetting.
214    fn record_used_chars(&mut self, text: &str) {
215        let name = match &self.current_font {
216            Font::Custom(name) => name.clone(),
217            builtin => builtin.pdf_name(),
218        };
219        self.used_characters_by_font
220            .entry(name)
221            .or_default()
222            .extend(text.chars());
223    }
224
225    /// Introspection helper for Task 7 tests (issue #230).
226    #[cfg(test)]
227    pub(crate) fn font_metrics_store_for_test(&self) -> Option<&FontMetricsStore> {
228        self.font_metrics_store.as_ref()
229    }
230
231    /// Get the characters used in this text context (merged across all
232    /// fonts). Test-only compatibility accessor; callers that need
233    /// per-font accuracy for subsetting should use
234    /// [`TextContext::get_used_characters_by_font`] (issue #204).
235    #[cfg(test)]
236    pub(crate) fn get_used_characters(&self) -> Option<HashSet<char>> {
237        let merged: HashSet<char> = self
238            .used_characters_by_font
239            .values()
240            .flat_map(|s| s.iter().copied())
241            .collect();
242        if merged.is_empty() {
243            None
244        } else {
245            Some(merged)
246        }
247    }
248
249    /// Get the per-font character map for font subsetting (issue #204).
250    pub(crate) fn get_used_characters_by_font(&self) -> &HashMap<String, HashSet<char>> {
251        &self.used_characters_by_font
252    }
253
254    pub fn set_font(&mut self, font: Font, size: f64) -> &mut Self {
255        self.current_font = font;
256        self.font_size = size;
257        self
258    }
259
260    /// Get the current font
261    #[allow(dead_code)]
262    pub(crate) fn current_font(&self) -> &Font {
263        &self.current_font
264    }
265
266    /// Current non-stroking (fill) colour, if one has been explicitly set.
267    /// Used by `Page::text_flow` to propagate the page-level text colour
268    /// into derived `TextFlowContext`s (issue #216).
269    pub(crate) fn fill_color(&self) -> Option<Color> {
270        self.fill_color
271    }
272
273    /// Accessors for the remaining text-state parameters (issue #222 —
274    /// Phase 6 of the v2.7.0 IR refactor). Used by `Page::text_flow` to
275    /// propagate the configured page-level state into derived
276    /// `TextFlowContext`s. Mirror of `fill_color()` above.
277    pub(crate) fn character_spacing(&self) -> Option<f64> {
278        self.character_spacing
279    }
280    pub(crate) fn word_spacing(&self) -> Option<f64> {
281        self.word_spacing
282    }
283    pub(crate) fn horizontal_scaling(&self) -> Option<f64> {
284        self.horizontal_scaling
285    }
286    pub(crate) fn leading(&self) -> Option<f64> {
287        self.leading
288    }
289    pub(crate) fn text_rise(&self) -> Option<f64> {
290        self.text_rise
291    }
292    pub(crate) fn rendering_mode(&self) -> Option<TextRenderingMode> {
293        self.rendering_mode
294    }
295    pub(crate) fn stroke_color(&self) -> Option<Color> {
296        self.stroke_color
297    }
298
299    pub fn at(&mut self, x: f64, y: f64) -> &mut Self {
300        // Update text_matrix immediately and store for write() operation
301        self.text_matrix[4] = x;
302        self.text_matrix[5] = y;
303        self.pending_position = Some((x, y));
304        self
305    }
306
307    pub fn write(&mut self, text: &str) -> Result<&mut Self> {
308        use crate::graphics::ops::Op;
309
310        self.operations.push(Op::BeginText);
311
312        // Set font
313        self.operations.push(Op::SetFont {
314            name: self.current_font.pdf_name(),
315            size: self.font_size,
316        });
317
318        // Apply text state parameters (Tc/Tw/Tz/TL/Ts/Tr + colour)
319        self.apply_text_state_parameters();
320
321        // Set text position using pending_position if available, otherwise use text_matrix
322        let (x, y) = if let Some((px, py)) = self.pending_position.take() {
323            (px, py)
324        } else {
325            (self.text_matrix[4], self.text_matrix[5])
326        };
327        self.operations.push(Op::SetTextPosition { x, y });
328
329        // Shared encoding + escape pipeline (issue #240): builtin fonts
330        // route through WinAnsi + literal-string escape; Custom (CJK)
331        // fonts route through UTF-16BE hex. Mirror of the same call in
332        // `TextFlowContext::write_wrapped` — single source of truth.
333        self.operations
334            .push(build_show_text_op(text, &self.current_font));
335
336        // Track used characters for font subsetting bucketed by the
337        // active custom font (issue #204).
338        self.record_used_chars(text);
339
340        self.operations.push(Op::EndText);
341
342        Ok(self)
343    }
344
345    pub fn write_line(&mut self, text: &str) -> Result<&mut Self> {
346        self.write(text)?;
347        self.text_matrix[5] -= self.font_size * 1.2; // Move down for next line
348        Ok(self)
349    }
350
351    pub fn set_character_spacing(&mut self, spacing: f64) -> &mut Self {
352        self.character_spacing = Some(spacing);
353        self
354    }
355
356    pub fn set_word_spacing(&mut self, spacing: f64) -> &mut Self {
357        self.word_spacing = Some(spacing);
358        self
359    }
360
361    pub fn set_horizontal_scaling(&mut self, scale: f64) -> &mut Self {
362        self.horizontal_scaling = Some(scale);
363        self
364    }
365
366    pub fn set_leading(&mut self, leading: f64) -> &mut Self {
367        self.leading = Some(leading);
368        self
369    }
370
371    pub fn set_text_rise(&mut self, rise: f64) -> &mut Self {
372        self.text_rise = Some(rise);
373        self
374    }
375
376    /// Set the text rendering mode
377    pub fn set_rendering_mode(&mut self, mode: TextRenderingMode) -> &mut Self {
378        self.rendering_mode = Some(mode);
379        self
380    }
381
382    /// Set the text fill color
383    pub fn set_fill_color(&mut self, color: Color) -> &mut Self {
384        self.fill_color = Some(color);
385        self
386    }
387
388    /// Set the text stroke color
389    pub fn set_stroke_color(&mut self, color: Color) -> &mut Self {
390        self.stroke_color = Some(color);
391        self
392    }
393
394    /// Apply text state parameters as `Op` values pushed into `self.operations`.
395    ///
396    /// All non-finite floats are clamped to `0.0` at serialisation time by
397    /// `serialize_ops` (issues #220 + #221 extend to non-colour emitters in
398    /// the v2.7.0 IR refactor).
399    fn apply_text_state_parameters(&mut self) {
400        use crate::graphics::ops::Op;
401
402        if let Some(spacing) = self.character_spacing {
403            self.operations.push(Op::SetCharSpacing(spacing));
404        }
405        if let Some(spacing) = self.word_spacing {
406            self.operations.push(Op::SetWordSpacing(spacing));
407        }
408        if let Some(scale) = self.horizontal_scaling {
409            // Tz operator takes a percentage. The setter accepts a 0.0–1.0
410            // ratio and the original implementation multiplied by 100 at
411            // emission; preserve that contract.
412            self.operations
413                .push(Op::SetHorizontalScaling(scale * 100.0));
414        }
415        if let Some(leading) = self.leading {
416            self.operations.push(Op::SetLeading(leading));
417        }
418        if let Some(rise) = self.text_rise {
419            self.operations.push(Op::SetTextRise(rise));
420        }
421        if let Some(mode) = self.rendering_mode {
422            self.operations.push(Op::SetRenderingMode(mode as u8));
423        }
424
425        // Fill / stroke colour delegates to the IR variants which in turn
426        // delegate to `write_fill_color_bytes` / `write_stroke_color_bytes`
427        // (issues #220 + #221).
428        if let Some(color) = self.fill_color {
429            self.operations.push(Op::SetFillColor(color));
430        }
431        if let Some(color) = self.stroke_color {
432            self.operations.push(Op::SetStrokeColor(color));
433        }
434    }
435
436    pub(crate) fn generate_operations(&self) -> Result<Vec<u8>> {
437        let mut buf = Vec::new();
438        crate::graphics::ops::serialize_ops(&mut buf, &self.operations);
439        Ok(buf)
440    }
441
442    /// Take ownership of the accumulated `Op` buffer, leaving an empty
443    /// `Vec` in its place. Mirror of `GraphicsContext::drain_ops` —
444    /// used by `Page` to flush the text buffer into a unified content
445    /// stream on context switch (issue #227).
446    pub(crate) fn drain_ops(&mut self) -> Vec<crate::graphics::ops::Op> {
447        std::mem::take(&mut self.operations)
448    }
449
450    /// Read-only access to the operation list.
451    pub(crate) fn ops_slice(&self) -> &[crate::graphics::ops::Op] {
452        &self.operations
453    }
454
455    /// Appends a raw PDF operation to the text context
456    ///
457    /// This is used internally for marked content operators (BDC/EMC) and other
458    /// low-level PDF operations that need to be interleaved with text operations.
459    pub(crate) fn append_raw_operation(&mut self, operation: &str) {
460        self.operations
461            .push(crate::graphics::ops::Op::Raw(operation.as_bytes().to_vec()));
462    }
463
464    /// Get the current font size
465    pub fn font_size(&self) -> f64 {
466        self.font_size
467    }
468
469    /// Get the current text matrix
470    pub fn text_matrix(&self) -> [f64; 6] {
471        self.text_matrix
472    }
473
474    /// Get the current position
475    pub fn position(&self) -> (f64, f64) {
476        (self.text_matrix[4], self.text_matrix[5])
477    }
478
479    /// Clear all operations and reset text state parameters
480    pub fn clear(&mut self) {
481        self.operations.clear();
482        self.character_spacing = None;
483        self.word_spacing = None;
484        self.horizontal_scaling = None;
485        self.leading = None;
486        self.text_rise = None;
487        self.rendering_mode = None;
488        self.fill_color = None;
489        self.stroke_color = None;
490    }
491
492    /// Get the operations as a serialised PDF content-stream `String`.
493    ///
494    /// Pre-2.7.0 this returned `&str`. The IR migration replaced the
495    /// internal `String` buffer with a typed `Vec<Op>`, so the legacy
496    /// borrow is materialised on demand. Internal callers prefer
497    /// `generate_operations()` which returns the byte buffer directly.
498    pub fn operations(&self) -> String {
499        crate::graphics::ops::ops_to_string(&self.operations)
500    }
501
502    /// Generate text state operations for testing purposes.
503    /// Routes through the IR so the same sanitisation applies.
504    #[cfg(test)]
505    pub fn generate_text_state_operations(&self) -> String {
506        use crate::graphics::ops::{ops_to_string, Op};
507
508        let mut ops = Vec::new();
509        if let Some(spacing) = self.character_spacing {
510            ops.push(Op::SetCharSpacing(spacing));
511        }
512        if let Some(spacing) = self.word_spacing {
513            ops.push(Op::SetWordSpacing(spacing));
514        }
515        if let Some(scale) = self.horizontal_scaling {
516            ops.push(Op::SetHorizontalScaling(scale * 100.0));
517        }
518        if let Some(leading) = self.leading {
519            ops.push(Op::SetLeading(leading));
520        }
521        if let Some(rise) = self.text_rise {
522            ops.push(Op::SetTextRise(rise));
523        }
524        if let Some(mode) = self.rendering_mode {
525            ops.push(Op::SetRenderingMode(mode as u8));
526        }
527        ops_to_string(&ops)
528    }
529}
530
531#[cfg(test)]
532mod tests {
533    use super::*;
534
535    #[test]
536    fn test_text_context_new() {
537        let context = TextContext::new();
538        assert_eq!(context.current_font, Font::Helvetica);
539        assert_eq!(context.font_size, 12.0);
540        assert_eq!(context.text_matrix, [1.0, 0.0, 0.0, 1.0, 0.0, 0.0]);
541        assert!(context.operations.is_empty());
542    }
543
544    #[test]
545    fn test_text_context_default() {
546        let context = TextContext::default();
547        assert_eq!(context.current_font, Font::Helvetica);
548        assert_eq!(context.font_size, 12.0);
549    }
550
551    #[test]
552    fn test_set_font() {
553        let mut context = TextContext::new();
554        context.set_font(Font::TimesBold, 14.0);
555        assert_eq!(context.current_font, Font::TimesBold);
556        assert_eq!(context.font_size, 14.0);
557    }
558
559    #[test]
560    fn test_position() {
561        let mut context = TextContext::new();
562        context.at(100.0, 200.0);
563        let (x, y) = context.position();
564        assert_eq!(x, 100.0);
565        assert_eq!(y, 200.0);
566        assert_eq!(context.text_matrix[4], 100.0);
567        assert_eq!(context.text_matrix[5], 200.0);
568    }
569
570    #[test]
571    fn test_write_simple_text() {
572        let mut context = TextContext::new();
573        context.write("Hello").unwrap();
574
575        let ops = context.operations();
576        assert!(ops.contains("BT\n"));
577        assert!(ops.contains("ET\n"));
578        assert!(ops.contains("/Helvetica 12 Tf"));
579        assert!(ops.contains("(Hello) Tj"));
580    }
581
582    #[test]
583    fn test_write_text_with_escaping() {
584        let mut context = TextContext::new();
585        context.write("(Hello)").unwrap();
586
587        let ops = context.operations();
588        assert!(ops.contains("(\\(Hello\\)) Tj"));
589    }
590
591    #[test]
592    fn test_write_line() {
593        let mut context = TextContext::new();
594        let initial_y = context.text_matrix[5];
595        context.write_line("Line 1").unwrap();
596
597        // Y position should have moved down
598        let new_y = context.text_matrix[5];
599        assert!(new_y < initial_y);
600        assert_eq!(new_y, initial_y - 12.0 * 1.2); // font_size * 1.2
601    }
602
603    #[test]
604    fn test_character_spacing() {
605        let mut context = TextContext::new();
606        context.set_character_spacing(2.5);
607
608        let ops = context.generate_text_state_operations();
609        assert!(ops.contains("2.50 Tc"));
610    }
611
612    #[test]
613    fn test_word_spacing() {
614        let mut context = TextContext::new();
615        context.set_word_spacing(1.5);
616
617        let ops = context.generate_text_state_operations();
618        assert!(ops.contains("1.50 Tw"));
619    }
620
621    #[test]
622    fn test_horizontal_scaling() {
623        let mut context = TextContext::new();
624        context.set_horizontal_scaling(1.25);
625
626        let ops = context.generate_text_state_operations();
627        assert!(ops.contains("125.00 Tz")); // 1.25 * 100
628    }
629
630    #[test]
631    fn test_leading() {
632        let mut context = TextContext::new();
633        context.set_leading(15.0);
634
635        let ops = context.generate_text_state_operations();
636        assert!(ops.contains("15.00 TL"));
637    }
638
639    #[test]
640    fn test_text_rise() {
641        let mut context = TextContext::new();
642        context.set_text_rise(3.0);
643
644        let ops = context.generate_text_state_operations();
645        assert!(ops.contains("3.00 Ts"));
646    }
647
648    #[test]
649    fn test_clear() {
650        let mut context = TextContext::new();
651        context.write("Hello").unwrap();
652        assert!(!context.operations().is_empty());
653
654        context.clear();
655        assert!(context.operations().is_empty());
656    }
657
658    #[test]
659    fn test_generate_operations() {
660        let mut context = TextContext::new();
661        context.write("Test").unwrap();
662
663        let ops_bytes = context.generate_operations().unwrap();
664        let ops_string = String::from_utf8(ops_bytes).unwrap();
665        assert_eq!(ops_string, context.operations());
666    }
667
668    #[test]
669    fn test_method_chaining() {
670        let mut context = TextContext::new();
671        context
672            .set_font(Font::Courier, 10.0)
673            .at(50.0, 100.0)
674            .set_character_spacing(1.0)
675            .set_word_spacing(2.0);
676
677        assert_eq!(context.current_font(), &Font::Courier);
678        assert_eq!(context.font_size(), 10.0);
679        let (x, y) = context.position();
680        assert_eq!(x, 50.0);
681        assert_eq!(y, 100.0);
682    }
683
684    #[test]
685    fn test_text_matrix_access() {
686        let mut context = TextContext::new();
687        context.at(25.0, 75.0);
688
689        let matrix = context.text_matrix();
690        assert_eq!(matrix, [1.0, 0.0, 0.0, 1.0, 25.0, 75.0]);
691    }
692
693    #[test]
694    fn test_special_characters_encoding() {
695        let mut context = TextContext::new();
696        context.write("Test\nLine\tTab").unwrap();
697
698        let ops = context.operations();
699        assert!(ops.contains("\\n"));
700        assert!(ops.contains("\\t"));
701    }
702
703    #[test]
704    fn test_rendering_mode_fill() {
705        let mut context = TextContext::new();
706        context.set_rendering_mode(TextRenderingMode::Fill);
707
708        let ops = context.generate_text_state_operations();
709        assert!(ops.contains("0 Tr"));
710    }
711
712    #[test]
713    fn test_rendering_mode_stroke() {
714        let mut context = TextContext::new();
715        context.set_rendering_mode(TextRenderingMode::Stroke);
716
717        let ops = context.generate_text_state_operations();
718        assert!(ops.contains("1 Tr"));
719    }
720
721    #[test]
722    fn test_rendering_mode_fill_stroke() {
723        let mut context = TextContext::new();
724        context.set_rendering_mode(TextRenderingMode::FillStroke);
725
726        let ops = context.generate_text_state_operations();
727        assert!(ops.contains("2 Tr"));
728    }
729
730    #[test]
731    fn test_rendering_mode_invisible() {
732        let mut context = TextContext::new();
733        context.set_rendering_mode(TextRenderingMode::Invisible);
734
735        let ops = context.generate_text_state_operations();
736        assert!(ops.contains("3 Tr"));
737    }
738
739    #[test]
740    fn test_rendering_mode_fill_clip() {
741        let mut context = TextContext::new();
742        context.set_rendering_mode(TextRenderingMode::FillClip);
743
744        let ops = context.generate_text_state_operations();
745        assert!(ops.contains("4 Tr"));
746    }
747
748    #[test]
749    fn test_rendering_mode_stroke_clip() {
750        let mut context = TextContext::new();
751        context.set_rendering_mode(TextRenderingMode::StrokeClip);
752
753        let ops = context.generate_text_state_operations();
754        assert!(ops.contains("5 Tr"));
755    }
756
757    #[test]
758    fn test_rendering_mode_fill_stroke_clip() {
759        let mut context = TextContext::new();
760        context.set_rendering_mode(TextRenderingMode::FillStrokeClip);
761
762        let ops = context.generate_text_state_operations();
763        assert!(ops.contains("6 Tr"));
764    }
765
766    #[test]
767    fn test_rendering_mode_clip() {
768        let mut context = TextContext::new();
769        context.set_rendering_mode(TextRenderingMode::Clip);
770
771        let ops = context.generate_text_state_operations();
772        assert!(ops.contains("7 Tr"));
773    }
774
775    #[test]
776    fn test_text_state_parameters_chaining() {
777        let mut context = TextContext::new();
778        context
779            .set_character_spacing(1.5)
780            .set_word_spacing(2.0)
781            .set_horizontal_scaling(1.1)
782            .set_leading(14.0)
783            .set_text_rise(0.5)
784            .set_rendering_mode(TextRenderingMode::FillStroke);
785
786        let ops = context.generate_text_state_operations();
787        assert!(ops.contains("1.50 Tc"));
788        assert!(ops.contains("2.00 Tw"));
789        assert!(ops.contains("110.00 Tz"));
790        assert!(ops.contains("14.00 TL"));
791        assert!(ops.contains("0.50 Ts"));
792        assert!(ops.contains("2 Tr"));
793    }
794
795    #[test]
796    fn test_all_text_state_operators_generated() {
797        let mut context = TextContext::new();
798
799        // Test all operators in sequence
800        context.set_character_spacing(1.0); // Tc
801        context.set_word_spacing(2.0); // Tw
802        context.set_horizontal_scaling(1.2); // Tz
803        context.set_leading(15.0); // TL
804        context.set_text_rise(1.0); // Ts
805        context.set_rendering_mode(TextRenderingMode::Stroke); // Tr
806
807        let ops = context.generate_text_state_operations();
808
809        // Verify all PDF text state operators are present
810        assert!(
811            ops.contains("Tc"),
812            "Character spacing operator (Tc) not found"
813        );
814        assert!(ops.contains("Tw"), "Word spacing operator (Tw) not found");
815        assert!(
816            ops.contains("Tz"),
817            "Horizontal scaling operator (Tz) not found"
818        );
819        assert!(ops.contains("TL"), "Leading operator (TL) not found");
820        assert!(ops.contains("Ts"), "Text rise operator (Ts) not found");
821        assert!(
822            ops.contains("Tr"),
823            "Text rendering mode operator (Tr) not found"
824        );
825    }
826
827    #[test]
828    fn test_text_color_operations() {
829        use crate::Color;
830
831        let mut context = TextContext::new();
832
833        // Test RGB fill color
834        context.set_fill_color(Color::rgb(1.0, 0.0, 0.0));
835        context.apply_text_state_parameters();
836
837        let ops = context.operations();
838        assert!(
839            ops.contains("1.000 0.000 0.000 rg"),
840            "RGB fill color operator (rg) not found in: {ops}"
841        );
842
843        // Clear and test RGB stroke color
844        context.clear();
845        context.set_stroke_color(Color::rgb(0.0, 1.0, 0.0));
846        context.apply_text_state_parameters();
847
848        let ops = context.operations();
849        assert!(
850            ops.contains("0.000 1.000 0.000 RG"),
851            "RGB stroke color operator (RG) not found in: {ops}"
852        );
853
854        // Clear and test grayscale fill color
855        context.clear();
856        context.set_fill_color(Color::gray(0.5));
857        context.apply_text_state_parameters();
858
859        let ops = context.operations();
860        assert!(
861            ops.contains("0.500 g"),
862            "Gray fill color operator (g) not found in: {ops}"
863        );
864
865        // Clear and test CMYK stroke color
866        context.clear();
867        context.set_stroke_color(Color::cmyk(0.2, 0.3, 0.4, 0.1));
868        context.apply_text_state_parameters();
869
870        let ops = context.operations();
871        assert!(
872            ops.contains("0.200 0.300 0.400 0.100 K"),
873            "CMYK stroke color operator (K) not found in: {ops}"
874        );
875
876        // Test both fill and stroke colors together
877        context.clear();
878        context.set_fill_color(Color::rgb(1.0, 0.0, 0.0));
879        context.set_stroke_color(Color::rgb(0.0, 0.0, 1.0));
880        context.apply_text_state_parameters();
881
882        let ops = context.operations();
883        assert!(
884            ops.contains("1.000 0.000 0.000 rg") && ops.contains("0.000 0.000 1.000 RG"),
885            "Both fill and stroke colors not found in: {ops}"
886        );
887    }
888
889    // Issue #97: Test used_characters tracking
890    #[test]
891    fn test_used_characters_tracking_ascii() {
892        let mut context = TextContext::new();
893        context.write("Hello").unwrap();
894
895        let chars = context.get_used_characters();
896        assert!(chars.is_some());
897        let chars = chars.unwrap();
898        assert!(chars.contains(&'H'));
899        assert!(chars.contains(&'e'));
900        assert!(chars.contains(&'l'));
901        assert!(chars.contains(&'o'));
902        assert_eq!(chars.len(), 4); // H, e, l, o (l appears twice but HashSet dedupes)
903    }
904
905    #[test]
906    fn test_used_characters_tracking_cjk() {
907        let mut context = TextContext::new();
908        context.set_font(Font::Custom("NotoSansCJK".to_string()), 12.0);
909        context.write("中文测试").unwrap();
910
911        let chars = context.get_used_characters();
912        assert!(chars.is_some());
913        let chars = chars.unwrap();
914        assert!(chars.contains(&'中'));
915        assert!(chars.contains(&'文'));
916        assert!(chars.contains(&'测'));
917        assert!(chars.contains(&'试'));
918        assert_eq!(chars.len(), 4);
919    }
920
921    #[test]
922    fn test_used_characters_empty_initially() {
923        let context = TextContext::new();
924        assert!(context.get_used_characters().is_none());
925    }
926
927    #[test]
928    fn test_used_characters_multiple_writes() {
929        let mut context = TextContext::new();
930        context.write("AB").unwrap();
931        context.write("CD").unwrap();
932
933        let chars = context.get_used_characters();
934        assert!(chars.is_some());
935        let chars = chars.unwrap();
936        assert!(chars.contains(&'A'));
937        assert!(chars.contains(&'B'));
938        assert!(chars.contains(&'C'));
939        assert!(chars.contains(&'D'));
940        assert_eq!(chars.len(), 4);
941    }
942
943    /// RED for Phase 2 of the v2.7.0 IR refactor: with the legacy `String`
944    /// emission, `set_character_spacing(f64::NAN)` propagates `NaN` into a
945    /// `Tc` operator, which is invalid per ISO 32000-1 §7.3.3. Once the
946    /// migration routes Tc through `serialize_ops`, `finite_or_zero`
947    /// clamps non-finite values to `0.0` and the assertion below passes.
948    #[test]
949    fn nan_char_spacing_sanitised_at_emission() {
950        let mut ctx = TextContext::new();
951        ctx.set_character_spacing(f64::NAN);
952        ctx.write("hi").unwrap();
953        let ops = ctx.operations();
954        assert!(
955            ops.contains("0.00 Tc\n"),
956            "NaN char spacing must emit `0.00 Tc`, got: {ops:?}"
957        );
958        assert!(
959            !ops.contains("NaN") && !ops.contains("inf"),
960            "non-finite tokens must not appear in any Tc/Tw/Tz/TL/Ts emission, got: {ops:?}"
961        );
962    }
963
964    #[test]
965    fn pos_inf_word_spacing_sanitised_at_emission() {
966        let mut ctx = TextContext::new();
967        ctx.set_word_spacing(f64::INFINITY);
968        ctx.write("hi").unwrap();
969        let ops = ctx.operations();
970        assert!(
971            ops.contains("0.00 Tw\n"),
972            "+inf word spacing must emit `0.00 Tw`, got: {ops:?}"
973        );
974        assert!(
975            !ops.contains("inf"),
976            "`inf` must not appear in Tw output, got: {ops:?}"
977        );
978    }
979
980    #[test]
981    fn nan_horizontal_scaling_sanitised_at_emission() {
982        let mut ctx = TextContext::new();
983        ctx.set_horizontal_scaling(f64::NAN);
984        ctx.write("hi").unwrap();
985        let ops = ctx.operations();
986        assert!(
987            ops.contains("0.00 Tz\n"),
988            "NaN horizontal scaling must emit `0.00 Tz`, got: {ops:?}"
989        );
990    }
991
992    #[test]
993    fn nan_leading_and_text_rise_sanitised_at_emission() {
994        let mut ctx = TextContext::new();
995        ctx.set_leading(f64::NEG_INFINITY);
996        ctx.set_text_rise(f64::NAN);
997        ctx.write("hi").unwrap();
998        let ops = ctx.operations();
999        assert!(
1000            ops.contains("0.00 TL\n"),
1001            "-inf leading must emit `0.00 TL`, got: {ops:?}"
1002        );
1003        assert!(
1004            ops.contains("0.00 Ts\n"),
1005            "NaN text rise must emit `0.00 Ts`, got: {ops:?}"
1006        );
1007    }
1008
1009    #[test]
1010    fn test_text_context_threads_metrics_store() {
1011        use crate::text::metrics::{FontMetrics, FontMetricsStore};
1012        let store = FontMetricsStore::new();
1013        let ctx = TextContext::with_metrics_store(Some(store.clone()));
1014        // The store handle round-trips.
1015        assert!(ctx.font_metrics_store_for_test().is_some());
1016        // Cloning shares state.
1017        store.register("X", FontMetrics::new(400));
1018        assert_eq!(
1019            ctx.font_metrics_store_for_test().unwrap().len(),
1020            1,
1021            "TextContext must hold a clone that shares the underlying registry"
1022        );
1023    }
1024}