Skip to main content

pdfplumber_parse/
text_renderer.rs

1//! Text rendering operators (Tj, TJ, ', ") for the content stream interpreter.
2//!
3//! Processes text-showing operators that produce character glyph output,
4//! advancing the text position within the [`TextState`].
5//!
6//! At this stage, character codes are raw byte values — Unicode mapping
7//! (US-012) and font metrics for accurate widths (US-013) come later.
8
9use crate::text_state::TextState;
10
11/// A raw character extracted from a text rendering operator.
12///
13/// Captures the character code and text state snapshot at the moment
14/// of rendering. Unicode mapping and precise font metrics will refine
15/// this data in later processing stages.
16#[derive(Debug, Clone, PartialEq)]
17pub struct RawChar {
18    /// The character code from the PDF string byte.
19    pub char_code: u32,
20    /// The displacement in text space units used to advance the text position.
21    ///
22    /// Calculated as: `((w0 / 1000) * font_size + char_spacing + word_spacing_if_space) * h_scaling`.
23    pub displacement: f64,
24    /// Snapshot of the text matrix at the moment this character was rendered.
25    pub text_matrix: [f64; 6],
26}
27
28/// An element of a TJ array operand.
29///
30/// TJ arrays contain a mix of strings (to show) and numeric adjustments
31/// (for kerning/spacing).
32#[derive(Debug, Clone, PartialEq)]
33pub enum TjElement {
34    /// A string of bytes to show (each byte is a character code).
35    String(Vec<u8>),
36    /// A numeric adjustment in thousandths of a unit of text space.
37    /// Positive values move left (tighten), negative move right (loosen).
38    Adjustment(f64),
39}
40
41/// `Tj` operator: show a string.
42///
43/// Each byte in `string_bytes` is treated as a single character code.
44/// For each byte:
45/// 1. Snapshot the current text matrix as the character's render position
46/// 2. Look up the glyph width via `get_width(char_code)` (in glyph space, 1/1000 units)
47/// 3. Calculate text-space displacement and advance the text position
48///
49/// Returns a [`Vec<RawChar>`] with one entry per byte.
50pub fn show_string(
51    text_state: &mut TextState,
52    string_bytes: &[u8],
53    get_width: &dyn Fn(u32) -> f64,
54) -> Vec<RawChar> {
55    let mut chars = Vec::with_capacity(string_bytes.len());
56
57    for &byte in string_bytes {
58        let char_code = u32::from(byte);
59
60        // Snapshot the text matrix before advancing
61        let text_matrix = text_state.text_matrix_array();
62
63        // Calculate displacement in text space
64        let w0 = get_width(char_code);
65        let font_size = text_state.font_size;
66        let char_spacing = text_state.char_spacing;
67        let word_spacing = if char_code == 32 {
68            text_state.word_spacing
69        } else {
70            0.0
71        };
72        let h_scaling = text_state.h_scaling_normalized();
73
74        let tx = ((w0 / 1000.0) * font_size + char_spacing + word_spacing) * h_scaling;
75
76        chars.push(RawChar {
77            char_code,
78            displacement: tx,
79            text_matrix,
80        });
81
82        // Advance text position
83        text_state.advance_text_position(tx);
84    }
85
86    chars
87}
88
89/// `TJ` operator: show strings with positioning adjustments.
90///
91/// Processes an array of [`TjElement`]s. Strings are rendered like `Tj`;
92/// numeric adjustments shift the text position (in thousandths of a unit
93/// of text space). Positive adjustments move left, negative move right.
94pub fn show_string_with_positioning(
95    text_state: &mut TextState,
96    elements: &[TjElement],
97    get_width: &dyn Fn(u32) -> f64,
98) -> Vec<RawChar> {
99    let mut chars = Vec::new();
100
101    for element in elements {
102        match element {
103            TjElement::String(bytes) => {
104                let mut sub_chars = show_string(text_state, bytes, get_width);
105                chars.append(&mut sub_chars);
106            }
107            TjElement::Adjustment(adj) => {
108                // PDF spec: positive adjustment moves left, negative moves right
109                // tx = -(adj / 1000) * font_size * h_scaling
110                let font_size = text_state.font_size;
111                let h_scaling = text_state.h_scaling_normalized();
112                let tx = -(adj / 1000.0) * font_size * h_scaling;
113                text_state.advance_text_position(tx);
114            }
115        }
116    }
117
118    chars
119}
120
121/// `Tj` operator for CID fonts: show a string using 2-byte character codes.
122///
123/// For CID fonts (Type0/composite), each character code is formed from two
124/// consecutive bytes in big-endian order. If the byte string has an odd length,
125/// the last byte is treated as a single-byte code.
126pub fn show_string_cid(
127    text_state: &mut TextState,
128    string_bytes: &[u8],
129    get_width: &dyn Fn(u32) -> f64,
130) -> Vec<RawChar> {
131    let mut chars = Vec::with_capacity(string_bytes.len() / 2);
132    let mut i = 0;
133
134    while i < string_bytes.len() {
135        let char_code = if i + 1 < string_bytes.len() {
136            let code = u32::from(string_bytes[i]) << 8 | u32::from(string_bytes[i + 1]);
137            i += 2;
138            code
139        } else {
140            let code = u32::from(string_bytes[i]);
141            i += 1;
142            code
143        };
144
145        // Snapshot the text matrix before advancing
146        let text_matrix = text_state.text_matrix_array();
147
148        // Calculate displacement in text space
149        let w0 = get_width(char_code);
150        let font_size = text_state.font_size;
151        let char_spacing = text_state.char_spacing;
152        let word_spacing = if char_code == 32 {
153            text_state.word_spacing
154        } else {
155            0.0
156        };
157        let h_scaling = text_state.h_scaling_normalized();
158
159        let tx = ((w0 / 1000.0) * font_size + char_spacing + word_spacing) * h_scaling;
160
161        chars.push(RawChar {
162            char_code,
163            displacement: tx,
164            text_matrix,
165        });
166
167        // Advance text position
168        text_state.advance_text_position(tx);
169    }
170
171    chars
172}
173
174/// `TJ` operator with CID mode: show strings with positioning adjustments.
175///
176/// Like [`show_string_with_positioning`] but when `cid_mode` is true, string
177/// bytes are decoded as 2-byte character codes (for CID/Type0 fonts).
178pub fn show_string_with_positioning_mode(
179    text_state: &mut TextState,
180    elements: &[TjElement],
181    get_width: &dyn Fn(u32) -> f64,
182    cid_mode: bool,
183) -> Vec<RawChar> {
184    let mut chars = Vec::new();
185
186    for element in elements {
187        match element {
188            TjElement::String(bytes) => {
189                let mut sub_chars = if cid_mode {
190                    show_string_cid(text_state, bytes, get_width)
191                } else {
192                    show_string(text_state, bytes, get_width)
193                };
194                chars.append(&mut sub_chars);
195            }
196            TjElement::Adjustment(adj) => {
197                // PDF spec: positive adjustment moves left, negative moves right
198                let font_size = text_state.font_size;
199                let h_scaling = text_state.h_scaling_normalized();
200                let tx = -(adj / 1000.0) * font_size * h_scaling;
201                text_state.advance_text_position(tx);
202            }
203        }
204    }
205
206    chars
207}
208
209/// `'` (single quote) operator: move to next line and show a string.
210///
211/// Equivalent to `T*` followed by `Tj`.
212pub fn quote_show_string(
213    text_state: &mut TextState,
214    string_bytes: &[u8],
215    get_width: &dyn Fn(u32) -> f64,
216) -> Vec<RawChar> {
217    text_state.move_to_next_line(); // T*
218    show_string(text_state, string_bytes, get_width) // Tj
219}
220
221/// `"` (double quote) operator: set spacing, move to next line, and show a string.
222///
223/// Equivalent to: `aw Tw`, `ac Tc`, then `string '`.
224pub fn double_quote_show_string(
225    text_state: &mut TextState,
226    word_spacing: f64,
227    char_spacing: f64,
228    string_bytes: &[u8],
229    get_width: &dyn Fn(u32) -> f64,
230) -> Vec<RawChar> {
231    text_state.set_word_spacing(word_spacing); // aw Tw
232    text_state.set_char_spacing(char_spacing); // ac Tc
233    quote_show_string(text_state, string_bytes, get_width) // string '
234}
235
236#[cfg(test)]
237mod tests {
238    use super::*;
239
240    /// Constant width function: returns 600 glyph units for all characters.
241    /// This simulates a monospaced font where each glyph is 0.6 em wide.
242    fn constant_width(_char_code: u32) -> f64 {
243        600.0
244    }
245
246    /// Variable width function for testing different widths per character.
247    fn variable_width(char_code: u32) -> f64 {
248        match char_code {
249            32 => 250.0, // space
250            65 => 722.0, // A
251            66 => 667.0, // B
252            _ => 500.0,  // default
253        }
254    }
255
256    fn assert_approx(actual: f64, expected: f64) {
257        assert!(
258            (actual - expected).abs() < 1e-6,
259            "expected {expected}, got {actual}"
260        );
261    }
262
263    // --- RawChar construction ---
264
265    #[test]
266    fn raw_char_construction() {
267        let rc = RawChar {
268            char_code: 65,
269            displacement: 7.2,
270            text_matrix: [1.0, 0.0, 0.0, 1.0, 72.0, 700.0],
271        };
272        assert_eq!(rc.char_code, 65);
273        assert_approx(rc.displacement, 7.2);
274        assert_eq!(rc.text_matrix, [1.0, 0.0, 0.0, 1.0, 72.0, 700.0]);
275    }
276
277    #[test]
278    fn raw_char_clone() {
279        let rc = RawChar {
280            char_code: 65,
281            displacement: 7.2,
282            text_matrix: [1.0, 0.0, 0.0, 1.0, 72.0, 700.0],
283        };
284        let cloned = rc.clone();
285        assert_eq!(rc, cloned);
286    }
287
288    // --- TjElement ---
289
290    #[test]
291    fn tj_element_string_variant() {
292        let elem = TjElement::String(vec![65, 66, 67]);
293        if let TjElement::String(bytes) = &elem {
294            assert_eq!(bytes, &[65, 66, 67]);
295        } else {
296            panic!("expected String variant");
297        }
298    }
299
300    #[test]
301    fn tj_element_adjustment_variant() {
302        let elem = TjElement::Adjustment(-120.0);
303        if let TjElement::Adjustment(adj) = &elem {
304            assert_approx(*adj, -120.0);
305        } else {
306            panic!("expected Adjustment variant");
307        }
308    }
309
310    // --- Tj operator: show_string ---
311
312    #[test]
313    fn tj_empty_string() {
314        let mut ts = TextState::new();
315        ts.begin_text();
316        ts.set_font("F1".to_string(), 12.0);
317
318        let chars = show_string(&mut ts, &[], &constant_width);
319        assert!(chars.is_empty());
320    }
321
322    #[test]
323    fn tj_single_char() {
324        let mut ts = TextState::new();
325        ts.begin_text();
326        ts.set_font("F1".to_string(), 12.0);
327        ts.move_text_position(72.0, 700.0);
328
329        let chars = show_string(&mut ts, &[65], &constant_width); // 'A'
330        assert_eq!(chars.len(), 1);
331        assert_eq!(chars[0].char_code, 65);
332        // Text matrix captured at render position
333        assert_eq!(chars[0].text_matrix, [1.0, 0.0, 0.0, 1.0, 72.0, 700.0]);
334        // displacement = (600/1000 * 12 + 0 + 0) * 1.0 = 7.2
335        assert_approx(chars[0].displacement, 7.2);
336    }
337
338    #[test]
339    fn tj_multiple_chars() {
340        let mut ts = TextState::new();
341        ts.begin_text();
342        ts.set_font("F1".to_string(), 10.0);
343        ts.move_text_position(100.0, 500.0);
344
345        // "AB" = bytes [65, 66]
346        let chars = show_string(&mut ts, &[65, 66], &constant_width);
347        assert_eq!(chars.len(), 2);
348
349        // First char at position (100, 500)
350        assert_eq!(chars[0].char_code, 65);
351        assert_approx(chars[0].text_matrix[4], 100.0);
352
353        // Second char: displaced by (600/1000 * 10) * 1.0 = 6.0
354        assert_eq!(chars[1].char_code, 66);
355        assert_approx(chars[1].text_matrix[4], 106.0);
356    }
357
358    #[test]
359    fn tj_with_char_spacing() {
360        let mut ts = TextState::new();
361        ts.begin_text();
362        ts.set_font("F1".to_string(), 10.0);
363        ts.set_char_spacing(2.0);
364        ts.move_text_position(100.0, 500.0);
365
366        let chars = show_string(&mut ts, &[65, 66], &constant_width);
367
368        // First char displacement: (600/1000 * 10 + 2.0) * 1.0 = 8.0
369        assert_approx(chars[0].displacement, 8.0);
370        // Second char starts at 100 + 8 = 108
371        assert_approx(chars[1].text_matrix[4], 108.0);
372    }
373
374    #[test]
375    fn tj_word_spacing_applied_only_for_space() {
376        let mut ts = TextState::new();
377        ts.begin_text();
378        ts.set_font("F1".to_string(), 10.0);
379        ts.set_word_spacing(5.0);
380        ts.move_text_position(100.0, 500.0);
381
382        // Space (32) gets word spacing; 'A' (65) does not
383        let chars = show_string(&mut ts, &[32, 65], &constant_width);
384
385        // Space: (600/1000 * 10 + 0 + 5.0) * 1.0 = 11.0
386        assert_approx(chars[0].displacement, 11.0);
387        assert_eq!(chars[0].char_code, 32);
388
389        // 'A': (600/1000 * 10 + 0 + 0) * 1.0 = 6.0
390        assert_approx(chars[1].displacement, 6.0);
391        assert_eq!(chars[1].char_code, 65);
392    }
393
394    #[test]
395    fn tj_with_h_scaling() {
396        let mut ts = TextState::new();
397        ts.begin_text();
398        ts.set_font("F1".to_string(), 10.0);
399        ts.set_h_scaling(50.0); // 50%
400        ts.move_text_position(100.0, 500.0);
401
402        let chars = show_string(&mut ts, &[65], &constant_width);
403
404        // displacement: (600/1000 * 10 + 0) * 0.5 = 3.0
405        assert_approx(chars[0].displacement, 3.0);
406    }
407
408    #[test]
409    fn tj_combined_spacing_and_scaling() {
410        let mut ts = TextState::new();
411        ts.begin_text();
412        ts.set_font("F1".to_string(), 10.0);
413        ts.set_char_spacing(1.0);
414        ts.set_word_spacing(3.0);
415        ts.set_h_scaling(200.0); // 200%
416        ts.move_text_position(0.0, 0.0);
417
418        // Space char: (600/1000 * 10 + 1.0 + 3.0) * 2.0 = (6 + 1 + 3) * 2 = 20.0
419        let chars = show_string(&mut ts, &[32], &constant_width);
420        assert_approx(chars[0].displacement, 20.0);
421
422        // Non-space: (600/1000 * 10 + 1.0 + 0) * 2.0 = (6 + 1) * 2 = 14.0
423        let chars = show_string(&mut ts, &[65], &constant_width);
424        assert_approx(chars[0].displacement, 14.0);
425    }
426
427    #[test]
428    fn tj_advances_text_position() {
429        let mut ts = TextState::new();
430        ts.begin_text();
431        ts.set_font("F1".to_string(), 10.0);
432        ts.move_text_position(100.0, 500.0);
433
434        show_string(&mut ts, &[65, 66], &constant_width);
435
436        // After 2 chars: 100 + 6.0 + 6.0 = 112.0
437        assert_approx(ts.text_matrix().e, 112.0);
438    }
439
440    #[test]
441    fn tj_does_not_change_line_matrix() {
442        let mut ts = TextState::new();
443        ts.begin_text();
444        ts.set_font("F1".to_string(), 10.0);
445        ts.move_text_position(100.0, 500.0);
446
447        let line_matrix_before = *ts.line_matrix();
448        show_string(&mut ts, &[65, 66, 67], &constant_width);
449
450        // Line matrix should not change during Tj
451        assert_eq!(*ts.line_matrix(), line_matrix_before);
452    }
453
454    #[test]
455    fn tj_with_variable_widths() {
456        let mut ts = TextState::new();
457        ts.begin_text();
458        ts.set_font("F1".to_string(), 10.0);
459        ts.move_text_position(0.0, 0.0);
460
461        // 'A' (722), space (250), 'B' (667)
462        let chars = show_string(&mut ts, &[65, 32, 66], &variable_width);
463
464        assert_eq!(chars.len(), 3);
465        // A: (722/1000 * 10) * 1.0 = 7.22
466        assert_approx(chars[0].displacement, 7.22);
467        // space: (250/1000 * 10) * 1.0 = 2.5
468        assert_approx(chars[1].displacement, 2.5);
469        // B: (667/1000 * 10) * 1.0 = 6.67
470        assert_approx(chars[2].displacement, 6.67);
471
472        // Verify positions
473        assert_approx(chars[0].text_matrix[4], 0.0);
474        assert_approx(chars[1].text_matrix[4], 7.22);
475        assert_approx(chars[2].text_matrix[4], 9.72); // 7.22 + 2.5
476    }
477
478    #[test]
479    fn tj_with_scaled_text_matrix() {
480        let mut ts = TextState::new();
481        ts.begin_text();
482        ts.set_font("F1".to_string(), 1.0); // font_size = 1 (scaling via Tm)
483        // Text matrix with 12x scaling (simulates 12pt font via matrix)
484        ts.set_text_matrix(12.0, 0.0, 0.0, 12.0, 72.0, 700.0);
485
486        let chars = show_string(&mut ts, &[65], &constant_width);
487
488        assert_eq!(chars[0].text_matrix, [12.0, 0.0, 0.0, 12.0, 72.0, 700.0]);
489        // displacement = (600/1000 * 1.0) * 1.0 = 0.6
490        assert_approx(chars[0].displacement, 0.6);
491        // advance_text_position(0.6) pre-multiplies [1 0 0 1 0.6 0] × [12 0 0 12 72 700]
492        // new_e = 0.6 * 12 + 72 = 79.2
493        assert_approx(ts.text_matrix().e, 79.2);
494    }
495
496    // --- TJ operator: show_string_with_positioning ---
497
498    #[test]
499    fn tj_array_empty() {
500        let mut ts = TextState::new();
501        ts.begin_text();
502        ts.set_font("F1".to_string(), 10.0);
503
504        let chars = show_string_with_positioning(&mut ts, &[], &constant_width);
505        assert!(chars.is_empty());
506    }
507
508    #[test]
509    fn tj_array_strings_only() {
510        let mut ts = TextState::new();
511        ts.begin_text();
512        ts.set_font("F1".to_string(), 10.0);
513        ts.move_text_position(100.0, 500.0);
514
515        let elements = vec![
516            TjElement::String(vec![65]), // "A"
517            TjElement::String(vec![66]), // "B"
518        ];
519        let chars = show_string_with_positioning(&mut ts, &elements, &constant_width);
520
521        assert_eq!(chars.len(), 2);
522        assert_eq!(chars[0].char_code, 65);
523        assert_eq!(chars[1].char_code, 66);
524        // Same as two consecutive Tj calls
525        assert_approx(chars[0].text_matrix[4], 100.0);
526        assert_approx(chars[1].text_matrix[4], 106.0);
527    }
528
529    #[test]
530    fn tj_array_with_negative_adjustment_adds_space() {
531        let mut ts = TextState::new();
532        ts.begin_text();
533        ts.set_font("F1".to_string(), 10.0);
534        ts.move_text_position(100.0, 500.0);
535
536        // [(A) -200 (B)]
537        // -200 means move right: tx = -(-200)/1000 * 10 * 1.0 = +2.0
538        let elements = vec![
539            TjElement::String(vec![65]),
540            TjElement::Adjustment(-200.0),
541            TjElement::String(vec![66]),
542        ];
543        let chars = show_string_with_positioning(&mut ts, &elements, &constant_width);
544
545        assert_eq!(chars.len(), 2);
546        assert_approx(chars[0].text_matrix[4], 100.0);
547        // A advance (6.0) + adjustment (+2.0) = 8.0 offset
548        assert_approx(chars[1].text_matrix[4], 108.0);
549    }
550
551    #[test]
552    fn tj_array_with_positive_adjustment_tightens() {
553        let mut ts = TextState::new();
554        ts.begin_text();
555        ts.set_font("F1".to_string(), 10.0);
556        ts.move_text_position(100.0, 500.0);
557
558        // [(A) 200 (B)] — positive adjustment moves LEFT (kerning/tightening)
559        let elements = vec![
560            TjElement::String(vec![65]),
561            TjElement::Adjustment(200.0),
562            TjElement::String(vec![66]),
563        ];
564        let chars = show_string_with_positioning(&mut ts, &elements, &constant_width);
565
566        assert_eq!(chars.len(), 2);
567        // A at 100, advance 6.0, then adjustment -(200/1000)*10 = -2.0
568        // B at 100 + 6.0 - 2.0 = 104.0
569        assert_approx(chars[1].text_matrix[4], 104.0);
570    }
571
572    #[test]
573    fn tj_array_adjustment_only() {
574        let mut ts = TextState::new();
575        ts.begin_text();
576        ts.set_font("F1".to_string(), 10.0);
577        ts.move_text_position(100.0, 500.0);
578
579        // TJ array with only adjustments (no characters)
580        let elements = vec![
581            TjElement::Adjustment(-500.0), // move right by 5.0
582        ];
583        let chars = show_string_with_positioning(&mut ts, &elements, &constant_width);
584
585        assert!(chars.is_empty());
586        // Position should be advanced by -(-500)/1000 * 10 = +5.0
587        assert_approx(ts.text_matrix().e, 105.0);
588    }
589
590    #[test]
591    fn tj_array_multi_byte_strings() {
592        let mut ts = TextState::new();
593        ts.begin_text();
594        ts.set_font("F1".to_string(), 10.0);
595        ts.move_text_position(0.0, 0.0);
596
597        // [(AB) -100 (CD)]
598        let elements = vec![
599            TjElement::String(vec![65, 66]),
600            TjElement::Adjustment(-100.0),
601            TjElement::String(vec![67, 68]),
602        ];
603        let chars = show_string_with_positioning(&mut ts, &elements, &constant_width);
604
605        assert_eq!(chars.len(), 4);
606        assert_eq!(chars[0].char_code, 65); // A
607        assert_eq!(chars[1].char_code, 66); // B
608        assert_eq!(chars[2].char_code, 67); // C
609        assert_eq!(chars[3].char_code, 68); // D
610
611        // A at 0, B at 6, adjustment +1.0, C at 13.0, D at 19.0
612        assert_approx(chars[0].text_matrix[4], 0.0);
613        assert_approx(chars[1].text_matrix[4], 6.0);
614        assert_approx(chars[2].text_matrix[4], 13.0); // 6 + 6 + 1
615        assert_approx(chars[3].text_matrix[4], 19.0); // 13 + 6
616    }
617
618    #[test]
619    fn tj_array_adjustment_with_h_scaling() {
620        let mut ts = TextState::new();
621        ts.begin_text();
622        ts.set_font("F1".to_string(), 10.0);
623        ts.set_h_scaling(50.0); // 50%
624        ts.move_text_position(100.0, 500.0);
625
626        // [(A) -1000 (B)] — adjustment of -1000 thousandths
627        let elements = vec![
628            TjElement::String(vec![65]),
629            TjElement::Adjustment(-1000.0),
630            TjElement::String(vec![66]),
631        ];
632        let chars = show_string_with_positioning(&mut ts, &elements, &constant_width);
633
634        // A displacement: (600/1000 * 10) * 0.5 = 3.0
635        assert_approx(chars[0].displacement, 3.0);
636        // Adjustment: -(-1000/1000) * 10 * 0.5 = 5.0
637        // B at: 100 + 3.0 + 5.0 = 108.0
638        assert_approx(chars[1].text_matrix[4], 108.0);
639    }
640
641    // --- ' operator: quote_show_string ---
642
643    #[test]
644    fn quote_moves_to_next_line_then_shows() {
645        let mut ts = TextState::new();
646        ts.begin_text();
647        ts.set_font("F1".to_string(), 10.0);
648        ts.set_leading(14.0);
649        ts.move_text_position(72.0, 700.0);
650
651        let chars = quote_show_string(&mut ts, &[65], &constant_width);
652
653        assert_eq!(chars.len(), 1);
654        assert_eq!(chars[0].char_code, 65);
655        // T* moved to (72, 700 - 14) = (72, 686)
656        assert_approx(chars[0].text_matrix[4], 72.0);
657        assert_approx(chars[0].text_matrix[5], 686.0);
658    }
659
660    #[test]
661    fn quote_empty_string() {
662        let mut ts = TextState::new();
663        ts.begin_text();
664        ts.set_font("F1".to_string(), 10.0);
665        ts.set_leading(14.0);
666        ts.move_text_position(72.0, 700.0);
667
668        let chars = quote_show_string(&mut ts, &[], &constant_width);
669
670        assert!(chars.is_empty());
671        // T* should still have moved the position
672        assert_approx(ts.text_matrix().e, 72.0);
673        assert_approx(ts.text_matrix().f, 686.0);
674    }
675
676    #[test]
677    fn quote_updates_line_matrix() {
678        let mut ts = TextState::new();
679        ts.begin_text();
680        ts.set_font("F1".to_string(), 10.0);
681        ts.set_leading(14.0);
682        ts.move_text_position(72.0, 700.0);
683
684        quote_show_string(&mut ts, &[65], &constant_width);
685
686        // Line matrix should reflect the T* move
687        assert_approx(ts.line_matrix().e, 72.0);
688        assert_approx(ts.line_matrix().f, 686.0);
689    }
690
691    // --- " operator: double_quote_show_string ---
692
693    #[test]
694    fn double_quote_sets_spacing_then_shows() {
695        let mut ts = TextState::new();
696        ts.begin_text();
697        ts.set_font("F1".to_string(), 10.0);
698        ts.set_leading(14.0);
699        ts.move_text_position(72.0, 700.0);
700
701        let chars = double_quote_show_string(&mut ts, 3.0, 1.0, &[65], &constant_width);
702
703        assert_eq!(chars.len(), 1);
704        // Word spacing and char spacing should be set
705        assert_approx(ts.word_spacing, 3.0);
706        assert_approx(ts.char_spacing, 1.0);
707        // T* moved to (72, 686), then showed 'A'
708        assert_approx(chars[0].text_matrix[4], 72.0);
709        assert_approx(chars[0].text_matrix[5], 686.0);
710        // displacement includes the new char_spacing: (600/1000 * 10 + 1.0) * 1.0 = 7.0
711        assert_approx(chars[0].displacement, 7.0);
712    }
713
714    #[test]
715    fn double_quote_word_spacing_applies_to_space() {
716        let mut ts = TextState::new();
717        ts.begin_text();
718        ts.set_font("F1".to_string(), 10.0);
719        ts.set_leading(14.0);
720        ts.move_text_position(72.0, 700.0);
721
722        // Show a space character — word spacing should apply
723        let chars = double_quote_show_string(&mut ts, 5.0, 0.0, &[32], &constant_width);
724
725        // displacement: (600/1000 * 10 + 0 + 5.0) * 1.0 = 11.0
726        assert_approx(chars[0].displacement, 11.0);
727    }
728
729    // --- Position tracking across multiple operators ---
730
731    #[test]
732    fn position_tracking_across_multiple_tj() {
733        let mut ts = TextState::new();
734        ts.begin_text();
735        ts.set_font("F1".to_string(), 10.0);
736        ts.move_text_position(100.0, 500.0);
737
738        // First Tj: "AB"
739        let _chars1 = show_string(&mut ts, &[65, 66], &constant_width);
740        // Second Tj: "CD"
741        let chars2 = show_string(&mut ts, &[67, 68], &constant_width);
742
743        // After "AB": position = 100 + 6 + 6 = 112
744        // C at 112, D at 118
745        assert_approx(chars2[0].text_matrix[4], 112.0);
746        assert_approx(chars2[1].text_matrix[4], 118.0);
747    }
748
749    #[test]
750    fn position_tracking_tj_then_quote() {
751        let mut ts = TextState::new();
752        ts.begin_text();
753        ts.set_font("F1".to_string(), 10.0);
754        ts.set_leading(14.0);
755        ts.move_text_position(72.0, 700.0);
756
757        // First line: Tj "A"
758        show_string(&mut ts, &[65], &constant_width);
759
760        // Next line via ': "B"
761        let chars = quote_show_string(&mut ts, &[66], &constant_width);
762
763        // T* moves to (72, 686) — x resets to line start
764        assert_approx(chars[0].text_matrix[4], 72.0);
765        assert_approx(chars[0].text_matrix[5], 686.0);
766    }
767
768    #[test]
769    fn position_tracking_multiple_quote_lines() {
770        let mut ts = TextState::new();
771        ts.begin_text();
772        ts.set_font("F1".to_string(), 10.0);
773        ts.set_leading(12.0);
774        ts.move_text_position(72.0, 700.0);
775
776        // Three lines using quote operator
777        let chars1 = quote_show_string(&mut ts, &[65], &constant_width);
778        let chars2 = quote_show_string(&mut ts, &[66], &constant_width);
779        let chars3 = quote_show_string(&mut ts, &[67], &constant_width);
780
781        // Line 1 at y = 700 - 12 = 688
782        assert_approx(chars1[0].text_matrix[5], 688.0);
783        // Line 2 at y = 688 - 12 = 676
784        assert_approx(chars2[0].text_matrix[5], 676.0);
785        // Line 3 at y = 676 - 12 = 664
786        assert_approx(chars3[0].text_matrix[5], 664.0);
787    }
788
789    // --- Realistic sequences ---
790
791    #[test]
792    fn realistic_text_block_sequence() {
793        let mut ts = TextState::new();
794
795        // Setup
796        ts.set_font("Helvetica".to_string(), 12.0);
797        ts.set_leading(14.0);
798
799        // BT
800        ts.begin_text();
801
802        // 72 700 Td
803        ts.move_text_position(72.0, 700.0);
804
805        // (Hello) Tj
806        let hello = show_string(&mut ts, b"Hello", &constant_width);
807        assert_eq!(hello.len(), 5);
808        assert_eq!(hello[0].char_code, b'H' as u32);
809        assert_eq!(hello[4].char_code, b'o' as u32);
810        assert_approx(hello[0].text_matrix[4], 72.0);
811
812        // T* — move to next line
813        ts.move_to_next_line();
814
815        // (World) Tj
816        let world = show_string(&mut ts, b"World", &constant_width);
817        assert_approx(world[0].text_matrix[4], 72.0);
818        assert_approx(world[0].text_matrix[5], 686.0); // 700 - 14
819
820        // ET
821        ts.end_text();
822    }
823
824    #[test]
825    fn realistic_tj_array_kerned_text() {
826        let mut ts = TextState::new();
827        ts.begin_text();
828        ts.set_font("Times-Roman".to_string(), 12.0);
829        ts.move_text_position(72.0, 700.0);
830
831        // [(T) 80 (o) -15 (da) 10 (y)] — typical kerned "Today"
832        let elements = vec![
833            TjElement::String(vec![b'T']),
834            TjElement::Adjustment(80.0), // tighten To pair
835            TjElement::String(vec![b'o']),
836            TjElement::Adjustment(-15.0), // loosen od pair
837            TjElement::String(vec![b'd', b'a']),
838            TjElement::Adjustment(10.0), // tighten ay pair
839            TjElement::String(vec![b'y']),
840        ];
841
842        let chars = show_string_with_positioning(&mut ts, &elements, &constant_width);
843
844        assert_eq!(chars.len(), 5);
845        assert_eq!(chars[0].char_code, b'T' as u32);
846        assert_eq!(chars[1].char_code, b'o' as u32);
847        assert_eq!(chars[2].char_code, b'd' as u32);
848        assert_eq!(chars[3].char_code, b'a' as u32);
849        assert_eq!(chars[4].char_code, b'y' as u32);
850
851        // T at 72.0
852        assert_approx(chars[0].text_matrix[4], 72.0);
853        // After T (7.2) + adjustment -(80/1000)*12 = -0.96
854        // o at 72 + 7.2 - 0.96 = 78.24
855        assert_approx(chars[1].text_matrix[4], 78.24);
856    }
857
858    #[test]
859    fn zero_width_font_produces_zero_displacement() {
860        let mut ts = TextState::new();
861        ts.begin_text();
862        ts.set_font("F1".to_string(), 10.0);
863        ts.move_text_position(100.0, 500.0);
864
865        let zero_width = |_: u32| 0.0;
866        let chars = show_string(&mut ts, &[65, 66], &zero_width);
867
868        assert_eq!(chars.len(), 2);
869        assert_approx(chars[0].displacement, 0.0);
870        assert_approx(chars[1].displacement, 0.0);
871        // Both chars at same position since no advancement
872        assert_approx(chars[0].text_matrix[4], 100.0);
873        assert_approx(chars[1].text_matrix[4], 100.0);
874    }
875
876    #[test]
877    fn zero_font_size_produces_only_spacing_displacement() {
878        let mut ts = TextState::new();
879        ts.begin_text();
880        ts.set_font("F1".to_string(), 0.0); // zero font size
881        ts.set_char_spacing(2.0);
882        ts.move_text_position(100.0, 500.0);
883
884        let chars = show_string(&mut ts, &[65], &constant_width);
885
886        // displacement: (600/1000 * 0 + 2.0) * 1.0 = 2.0
887        assert_approx(chars[0].displacement, 2.0);
888    }
889
890    // --- CID font 2-byte character codes: show_string_cid ---
891
892    #[test]
893    fn cid_show_string_two_byte_codes() {
894        let mut ts = TextState::new();
895        ts.begin_text();
896        ts.set_font("F1".to_string(), 12.0);
897        ts.move_text_position(72.0, 700.0);
898
899        // Two 2-byte characters: 0x4E2D (中) and 0x6587 (文)
900        let bytes = vec![0x4E, 0x2D, 0x65, 0x87];
901        let chars = show_string_cid(&mut ts, &bytes, &constant_width);
902
903        assert_eq!(chars.len(), 2);
904        assert_eq!(chars[0].char_code, 0x4E2D);
905        assert_eq!(chars[1].char_code, 0x6587);
906    }
907
908    #[test]
909    fn cid_show_string_empty() {
910        let mut ts = TextState::new();
911        ts.begin_text();
912        ts.set_font("F1".to_string(), 12.0);
913
914        let chars = show_string_cid(&mut ts, &[], &constant_width);
915        assert!(chars.is_empty());
916    }
917
918    #[test]
919    fn cid_show_string_odd_byte_length() {
920        let mut ts = TextState::new();
921        ts.begin_text();
922        ts.set_font("F1".to_string(), 12.0);
923
924        // 3 bytes: first two form 0x4E2D, last byte is 0x41
925        let bytes = vec![0x4E, 0x2D, 0x41];
926        let chars = show_string_cid(&mut ts, &bytes, &constant_width);
927
928        assert_eq!(chars.len(), 2);
929        assert_eq!(chars[0].char_code, 0x4E2D);
930        assert_eq!(chars[1].char_code, 0x41);
931    }
932
933    #[test]
934    fn cid_show_string_single_two_byte_code() {
935        let mut ts = TextState::new();
936        ts.begin_text();
937        ts.set_font("F1".to_string(), 10.0);
938        ts.move_text_position(100.0, 500.0);
939
940        // Single 2-byte character: 0x0041 (should be 'A' in Unicode)
941        let bytes = vec![0x00, 0x41];
942        let chars = show_string_cid(&mut ts, &bytes, &constant_width);
943
944        assert_eq!(chars.len(), 1);
945        assert_eq!(chars[0].char_code, 0x0041);
946        assert_eq!(chars[0].text_matrix, [1.0, 0.0, 0.0, 1.0, 100.0, 500.0]);
947        // displacement = (600/1000 * 10 + 0 + 0) * 1.0 = 6.0
948        assert_approx(chars[0].displacement, 6.0);
949    }
950
951    #[test]
952    fn cid_show_string_advances_position() {
953        let mut ts = TextState::new();
954        ts.begin_text();
955        ts.set_font("F1".to_string(), 10.0);
956        ts.move_text_position(100.0, 500.0);
957
958        // Two 2-byte codes
959        let bytes = vec![0x4E, 0x2D, 0x65, 0x87];
960        let chars = show_string_cid(&mut ts, &bytes, &constant_width);
961
962        assert_eq!(chars.len(), 2);
963        assert_approx(chars[0].text_matrix[4], 100.0);
964        // Second char advanced by 6.0 (600/1000 * 10)
965        assert_approx(chars[1].text_matrix[4], 106.0);
966    }
967
968    #[test]
969    fn cid_show_string_with_variable_widths() {
970        let mut ts = TextState::new();
971        ts.begin_text();
972        ts.set_font("F1".to_string(), 10.0);
973        ts.move_text_position(0.0, 0.0);
974
975        // Custom width function for CID codes
976        let cid_width = |code: u32| -> f64 {
977            match code {
978                0x4E2D => 1000.0, // full-width CJK
979                0x6587 => 1000.0,
980                _ => 500.0,
981            }
982        };
983
984        let bytes = vec![0x4E, 0x2D, 0x65, 0x87];
985        let chars = show_string_cid(&mut ts, &bytes, &cid_width);
986
987        // 0x4E2D width: (1000/1000 * 10) = 10.0
988        assert_approx(chars[0].displacement, 10.0);
989        // 0x6587 at position 10.0
990        assert_approx(chars[1].text_matrix[4], 10.0);
991        assert_approx(chars[1].displacement, 10.0);
992    }
993}