pdfrust/
content.rs

1use core::iter::Iterator;
2
3use crate::{
4    algebra::{Matrix, Number},
5    body::Resources,
6    object::Name,
7    tokenizer::{Token, Tokenizer},
8};
9
10#[derive(Default)]
11struct TextObject {
12    tm: Matrix,  // text matrix
13    tlm: Matrix, // text line matrix
14}
15
16struct Content<'a> {
17    graphic_state: GraphicsState,
18    graphic_state_stack: Vec<GraphicsState>,
19    text_object: TextObject,
20    tokenizer: Tokenizer<'a>,
21}
22
23#[derive(Debug, PartialEq)]
24enum ArrayVal {
25    Text(Vec<u8>),
26    Pos(Number),
27}
28
29type DashArray = Vec<Number>;
30type DashPhase = Number;
31type LineWidth = Number;
32type LineStyle = Number;
33type X = Number;
34type Y = Number;
35type X1 = Number;
36type Y1 = Number;
37type X2 = Number;
38type Y2 = Number;
39type X3 = Number;
40type Y3 = Number;
41type Gray = Number; // gray is a number between 0.0 (black) and 1.0 (white)
42type R = Number;
43type G = Number;
44type B = Number;
45
46#[derive(Debug, PartialEq)]
47enum GraphicsInstruction {
48    // Graphic state operators (page 219)
49    LowerQ,
50    UpperQ,
51    BDC, // Structure content operator (page 850) -> ignored at the moment
52    BMC,
53    EMC,
54    Cm(Number, Number, Number, Number, Number, Number), // Modify current transfo matrix
55    LowerW(LineWidth),                                  // Set the line width in the graphics state
56    UpperJ(LineStyle),            // Set the line cap style in the graphics state
57    LowerD(DashArray, DashPhase), // Set the line dash pattern in the graphics state
58    LowerI(Number),               // Set the flatness tolerance in the graphics state
59    Gs,                           // Set the specified parameters in the graphics state
60    // Path construction operators (page 226)
61    LowerM(X, Y), // Begin a new subpath by moving the current point to coordinates (x, y)
62    LowerL(X, Y), // Append a straight line segment from the current point to the point (x, y). The new current point is (x, y)
63    LowerC(X1, Y1, X2, Y2, X3, Y3), // Append a cubic Bézier curve to the current path
64    LowerH, // Close the current subpath by appending a straight line segment from the current point to the starting point of the subpath
65    Re(Number, Number, Number, Number), // Append a rectangle to the current path as a complete subpath, with lower-left corner (x, y) and dimensions width and height in user space.
66    // Clipping paths operators (page 235)
67    W,
68    WStar,
69    // Path painting operators (page 230)
70    S,
71    LowerF,
72    LowerFStar, // Fill the path, using the even-odd rule to determine the region to fill
73    N,
74    // Color operators (page 287)
75    Cs(String),
76    Sc(Number),
77    UpperG(Gray),               // Set the stroking color space to DeviceGray
78    LowerG(Gray),               // Same as G but used for nonstroking operations
79    RG(Number, Number, Number), // Set the stroking color space to DeviceRGB and set the color intensities
80    Rg(R, G, B),
81    // Text positionning operators (page 406)
82    Td(Number, Number), // move to the start of next line
83    TD(Number, Number), // move to the start of next line
84    Tm(Number, Number, Number, Number, Number, Number), // set text matrix Tm and text line matrix Tlm
85    TStar,
86    // Text state operators (page 398)
87    Tc(Number),         // set char space
88    Tf(String, Number), // set text font
89    Tr(Number),         // set text mode
90    // Text-showing operators (page 407)
91    Tj(Vec<u8>),       // show text string
92    TJ(Vec<ArrayVal>), // show text array
93    // Text object operator (page 405)
94    BeginText,
95    EndText,
96    // XObject operator (page 332)
97    Do(String),
98}
99
100impl<'a> From<Tokenizer<'a>> for Content<'a> {
101    fn from(tokenizer: Tokenizer<'a>) -> Self {
102        Content {
103            graphic_state: GraphicsState::default(),
104            graphic_state_stack: vec![],
105            text_object: TextObject::default(),
106            tokenizer,
107        }
108    }
109}
110
111impl<'a> From<&'a [u8]> for Content<'a> {
112    fn from(bytes: &'a [u8]) -> Self {
113        Content {
114            graphic_state: GraphicsState::default(),
115            graphic_state_stack: vec![],
116            text_object: TextObject::default(),
117            tokenizer: Tokenizer::new(bytes, 0),
118        }
119    }
120}
121
122impl Content<'_> {
123    fn process_q(&mut self) {
124        self.graphic_state_stack.push(self.graphic_state.clone())
125    }
126
127    fn process_upper_q(&mut self) {
128        self.graphic_state = self
129            .graphic_state_stack
130            .pop()
131            .expect("Unable to restore graphic state from empty stack");
132    }
133
134    fn process_cm(&mut self, cm: [Number; 6]) {
135        self.graphic_state.ctm = Matrix::from(cm);
136    }
137
138    fn process_w(&mut self, line_width: Number) {
139        self.graphic_state.line_width = line_width;
140    }
141
142    fn process_upper_j(&mut self, line_cap: Number) {
143        self.graphic_state.line_cap = line_cap;
144    }
145
146    fn process_d(&mut self, _dash_array: DashArray) {}
147
148    fn process_i(&mut self, flatness: Number) {
149        self.graphic_state.flatness = flatness;
150    }
151
152    fn process_gs(&mut self, _dict_name: Name) {}
153
154    fn process_m(&mut self, _x: Number, _y: Number) {}
155
156    fn process_l(&mut self, _x: Number, _y: Number) {}
157
158    fn process_c(
159        &mut self,
160        _x1: Number,
161        _y1: Number,
162        _x2: Number,
163        _y2: Number,
164        _x3: Number,
165        _y3: Number,
166    ) {
167    }
168
169    fn process_re(&mut self, _x: Number, _y: Number, _width: Number, _height: Number) {}
170
171    fn process_bt(&mut self) {
172        self.text_object = TextObject::default();
173    }
174
175    fn process_tc(&mut self, tc: Number) {
176        self.graphic_state.text_state.tc = tc;
177    }
178
179    fn process_td(&mut self, tx: Number, ty: Number) {
180        self.text_object.tlm =
181            Matrix::new(1.0, 0.0, 0.0, 1.0, f32::from(tx), f32::from(ty)) * self.text_object.tlm;
182        self.text_object.tm = self.text_object.tlm;
183    }
184
185    fn process_t_upper_d(&mut self, tx: Number, ty: Number) {
186        self.graphic_state.text_state.tl = -ty.clone();
187        self.process_td(tx, ty);
188    }
189
190    fn process_tr(&mut self, render: Number) {
191        self.graphic_state.text_state.tmode = render;
192    }
193
194    fn process_tf(&mut self, font: String, size: Number) {
195        self.graphic_state.text_state.tf = Some(font);
196        self.graphic_state.text_state.tfs = Some(size);
197    }
198
199    fn process_tm(&mut self, a: Number, b: Number, c: Number, d: Number, e: Number, f: Number) {
200        self.text_object.tm = Matrix::new(
201            f32::from(a.clone()),
202            f32::from(b.clone()),
203            f32::from(c.clone()),
204            f32::from(d.clone()),
205            f32::from(e.clone()),
206            f32::from(f.clone()),
207        );
208        self.text_object.tlm = Matrix::new(
209            f32::from(a),
210            f32::from(b),
211            f32::from(c),
212            f32::from(d),
213            f32::from(e),
214            f32::from(f),
215        );
216    }
217
218    fn process_t_star(&mut self) {
219        self.process_td(Number::Integer(0), self.graphic_state.text_state.tl.clone());
220    }
221}
222
223impl Iterator for Content<'_> {
224    type Item = GraphicsInstruction;
225
226    fn next(&mut self) -> Option<Self::Item> {
227        let mut buf: Vec<Token> = vec![];
228        while let Some(t) = self.tokenizer.next() {
229            match t {
230                Token::LitteralString(_) => buf.push(t),
231                Token::Name(_) => buf.push(t),
232                Token::ArrayBegin => buf.push(t),
233                Token::ArrayEnd => buf.push(t),
234                Token::DictBegin => {
235                    // ignored for now
236                    for t in self.tokenizer.by_ref() {
237                        if t == Token::DictEnd {
238                            break;
239                        }
240                    }
241                }
242                Token::HexString(_) => buf.push(t),
243                Token::Numeric(_) => buf.push(t),
244                Token::String(l) => match l.as_slice() {
245                    b"q" => {
246                        self.process_q();
247                        return Some(GraphicsInstruction::LowerQ);
248                    }
249                    b"Q" => {
250                        self.process_upper_q();
251                        return Some(GraphicsInstruction::UpperQ);
252                    }
253                    b"cm" => {
254                        let a = match &buf[0] {
255                            Token::Numeric(n) => n.clone(),
256                            t => panic!("Operand {t:?} is not allowed with operator re"),
257                        };
258                        let b = match &buf[1] {
259                            Token::Numeric(n) => n.clone(),
260                            t => panic!("Operand {t:?} is not allowed with operator re"),
261                        };
262                        let c = match &buf[2] {
263                            Token::Numeric(n) => n.clone(),
264                            t => panic!("Operand {t:?} is not allowed with operator re"),
265                        };
266                        let d = match &buf[3] {
267                            Token::Numeric(n) => n.clone(),
268                            t => panic!("Operand {t:?} is not allowed with operator re"),
269                        };
270                        let e = match &buf[4] {
271                            Token::Numeric(n) => n.clone(),
272                            t => panic!("Operand {t:?} is not allowed with operator re"),
273                        };
274                        let f = match &buf[5] {
275                            Token::Numeric(n) => n.clone(),
276                            t => panic!("Operand {t:?} is not allowed with operator re"),
277                        };
278                        self.process_cm([
279                            a.clone(),
280                            b.clone(),
281                            c.clone(),
282                            d.clone(),
283                            e.clone(),
284                            f.clone(),
285                        ]);
286                        return Some(GraphicsInstruction::Cm(a, b, c, d, e, f));
287                    }
288                    b"w" => {
289                        let line_width = match &buf[0] {
290                            Token::Numeric(n) => n.clone(),
291                            t => panic!("Operand {t:?} is not allowed with operator J"),
292                        };
293                        self.process_w(line_width.clone());
294                        return Some(GraphicsInstruction::LowerW(line_width));
295                    }
296                    b"J" => {
297                        let line_cap = match &buf[0] {
298                            Token::Numeric(n) => n.clone(),
299                            t => panic!("Operand {t:?} is not allowed with operator J"),
300                        };
301                        self.process_upper_j(line_cap.clone());
302                        return Some(GraphicsInstruction::UpperJ(line_cap));
303                    }
304                    b"d" => {
305                        let mut e = buf.iter();
306                        match e.next() {
307                            Some(Token::ArrayBegin) => (),
308                            Some(t) => panic!("First operand {t:?} is not allowed for operator d"),
309                            None => panic!("End of stream too early"),
310                        };
311                        let mut dash_array = DashArray::new();
312                        for t in e.by_ref() {
313                            match t {
314                                Token::Numeric(n) => dash_array.push(n.clone()),
315                                Token::ArrayEnd => break,
316                                t => panic!("Unexpected token {t:?} in dash array"),
317                            }
318                        }
319                        let dash_phase = match e.next() {
320                            Some(Token::Numeric(n)) => n.clone(),
321                            Some(t) => panic!("First operand {t:?} is not allowed for operator d"),
322                            None => panic!("End of stream too early"),
323                        };
324                        self.process_d(dash_array.clone());
325                        return Some(GraphicsInstruction::LowerD(dash_array, dash_phase));
326                    }
327                    b"i" => {
328                        let flatness = match &buf[0] {
329                            Token::Numeric(n) => n.clone(),
330                            t => panic!("Operand {t:?} is not allowed with operator re"),
331                        };
332                        self.process_i(flatness.clone());
333                        return Some(GraphicsInstruction::LowerI(flatness));
334                    }
335                    b"gs" => {
336                        let dict_name = match &buf[0] {
337                            Token::Name(n) => n.clone(),
338                            t => panic!("Operand {t:?} is not allowed with operator gs"),
339                        };
340                        self.process_gs(dict_name);
341                        return Some(GraphicsInstruction::Gs);
342                    }
343                    b"m" => {
344                        let x = match &buf[0] {
345                            Token::Numeric(n) => n.clone(),
346                            t => panic!("Operand {t:?} is not allowed with operator re"),
347                        };
348                        let y = match &buf[1] {
349                            Token::Numeric(n) => n.clone(),
350                            t => panic!("Operand {t:?} is not allowed with operator re"),
351                        };
352                        self.process_m(x.clone(), y.clone());
353                        return Some(GraphicsInstruction::LowerM(x, y));
354                    }
355                    b"l" => {
356                        let x = match &buf[0] {
357                            Token::Numeric(n) => n.clone(),
358                            t => panic!("Operand {t:?} is not allowed with operator re"),
359                        };
360                        let y = match &buf[1] {
361                            Token::Numeric(n) => n.clone(),
362                            t => panic!("Operand {t:?} is not allowed with operator re"),
363                        };
364                        self.process_l(x.clone(), y.clone());
365                        return Some(GraphicsInstruction::LowerL(x, y));
366                    }
367                    b"c" => {
368                        let x1 = match &buf[0] {
369                            Token::Numeric(n) => n.clone(),
370                            t => panic!("Operand {t:?} is not allowed with operator Tm"),
371                        };
372                        let y1 = match &buf[1] {
373                            Token::Numeric(n) => n.clone(),
374                            t => panic!("Operand {t:?} is not allowed with operator Tm"),
375                        };
376                        let x2 = match &buf[2] {
377                            Token::Numeric(n) => n.clone(),
378                            t => panic!("Operand {t:?} is not allowed with operator Tm"),
379                        };
380                        let y2 = match &buf[3] {
381                            Token::Numeric(n) => n.clone(),
382                            t => panic!("Operand {t:?} is not allowed with operator Tm"),
383                        };
384                        let x3 = match &buf[4] {
385                            Token::Numeric(n) => n.clone(),
386                            t => panic!("Operand {t:?} is not allowed with operator Tm"),
387                        };
388                        let y3 = match &buf[5] {
389                            Token::Numeric(n) => n.clone(),
390                            t => panic!("Operand {t:?} is not allowed with operator Tm"),
391                        };
392                        self.process_c(
393                            x1.clone(),
394                            y1.clone(),
395                            x2.clone(),
396                            y2.clone(),
397                            x3.clone(),
398                            y3.clone(),
399                        );
400                        return Some(GraphicsInstruction::LowerC(x1, y1, x2, y2, x3, y3));
401                    }
402                    b"h" => {
403                        return Some(GraphicsInstruction::LowerH);
404                    }
405                    b"re" => {
406                        let x = match &buf[0] {
407                            Token::Numeric(n) => n.clone(),
408                            t => panic!("Operand {t:?} is not allowed with operator re"),
409                        };
410                        let y = match &buf[1] {
411                            Token::Numeric(n) => n.clone(),
412                            t => panic!("Operand {t:?} is not allowed with operator re"),
413                        };
414                        let width = match &buf[2] {
415                            Token::Numeric(n) => n.clone(),
416                            t => panic!("Operand {t:?} is not allowed with operator re"),
417                        };
418                        let height = match &buf[3] {
419                            Token::Numeric(n) => n.clone(),
420                            t => panic!("Operand {t:?} is not allowed with operator re"),
421                        };
422                        self.process_re(x.clone(), y.clone(), width.clone(), height.clone());
423                        return Some(GraphicsInstruction::Re(x, y, width, height));
424                    }
425                    b"W" => return Some(GraphicsInstruction::W),
426                    b"W*" => return Some(GraphicsInstruction::WStar),
427                    b"S" => return Some(GraphicsInstruction::S),
428                    b"f" => return Some(GraphicsInstruction::LowerF),
429                    b"f*" => return Some(GraphicsInstruction::LowerFStar),
430                    b"n" => return Some(GraphicsInstruction::N),
431                    b"cs" => {
432                        let color_space = match &buf[0] {
433                            Token::Name(s) => s.clone(),
434                            t => panic!("Operand {t:?} is not allowed with operator cs"),
435                        };
436                        return Some(GraphicsInstruction::Cs(color_space));
437                    }
438                    b"sc" => {
439                        let colors = match &buf[0] {
440                            Token::Numeric(n) => n.clone(),
441                            t => panic!("Operand {t:?} is not allowed with operator cs"),
442                        };
443                        return Some(GraphicsInstruction::Sc(colors));
444                    }
445                    b"G" => {
446                        let gray = match &buf[0] {
447                            Token::Numeric(n) => n.clone(),
448                            t => panic!("Operand {t:?} is not allowed with operator G"),
449                        };
450                        return Some(GraphicsInstruction::UpperG(gray));
451                    }
452                    b"g" => {
453                        let gray = match &buf[0] {
454                            Token::Numeric(n) => n.clone(),
455                            t => panic!("Operand {t:?} is not allowed with operator G"),
456                        };
457                        return Some(GraphicsInstruction::LowerG(gray));
458                    }
459                    b"RG" => {
460                        let r = match &buf[0] {
461                            Token::Numeric(n) => n.clone(),
462                            t => panic!("Operand {t:?} is not allowed with operator rg"),
463                        };
464                        let g = match &buf[1] {
465                            Token::Numeric(n) => n.clone(),
466                            t => panic!("Operand {t:?} is not allowed with operator rg"),
467                        };
468                        let b = match &buf[2] {
469                            Token::Numeric(n) => n.clone(),
470                            t => panic!("Operand {t:?} is not allowed with operator rg"),
471                        };
472                        return Some(GraphicsInstruction::RG(r, g, b));
473                    }
474                    b"rg" => {
475                        let r = match &buf[0] {
476                            Token::Numeric(n) => n.clone(),
477                            t => panic!("Operand {t:?} is not allowed with operator rg"),
478                        };
479                        let g = match &buf[1] {
480                            Token::Numeric(n) => n.clone(),
481                            t => panic!("Operand {t:?} is not allowed with operator rg"),
482                        };
483                        let b = match &buf[2] {
484                            Token::Numeric(n) => n.clone(),
485                            t => panic!("Operand {t:?} is not allowed with operator rg"),
486                        };
487                        return Some(GraphicsInstruction::Rg(r, g, b));
488                    }
489                    b"BT" => {
490                        self.process_bt();
491                        return Some(GraphicsInstruction::BeginText);
492                    }
493                    b"ET" => return Some(GraphicsInstruction::EndText),
494                    b"TD" => {
495                        let tx = match &buf[0] {
496                            Token::Numeric(n) => n.clone(),
497                            t => panic!("Operand {t:?} is not allowed with operator TD"),
498                        };
499                        let ty = match &buf[1] {
500                            Token::Numeric(n) => n.clone(),
501                            t => panic!("Operand {t:?} is not allowed with operator TD"),
502                        };
503                        self.process_t_upper_d(tx.clone(), ty.clone());
504                        return Some(GraphicsInstruction::TD(tx, ty));
505                    }
506                    b"Td" => {
507                        if buf.len() != 2 {
508                            return self.next();
509                        }
510                        let tx = match &buf[0] {
511                            Token::Numeric(n) => n.clone(),
512                            t => panic!("Operand {t:?} is not allowed with operator TD"),
513                        };
514                        let ty = match &buf[1] {
515                            Token::Numeric(n) => n.clone(),
516                            t => panic!("Operand {t:?} is not allowed with operator TD"),
517                        };
518                        self.process_td(tx.clone(), ty.clone());
519                        return Some(GraphicsInstruction::Td(tx, ty));
520                    }
521                    b"Tc" => {
522                        let char_space = match &buf[0] {
523                            Token::Numeric(n) => n.clone(),
524                            t => panic!("Operand {t:?} is not allowed with operator TD"),
525                        };
526                        self.process_tc(char_space.clone());
527                        return Some(GraphicsInstruction::Tc(char_space));
528                    }
529                    b"Tf" => {
530                        let font = match &buf[0] {
531                            Token::Name(n) => n.clone(),
532                            t => panic!("Operand {t:?} is not allowed with operator TD"),
533                        };
534                        let size = match &buf[1] {
535                            Token::Numeric(n) => n.clone(),
536                            t => panic!("Operand {t:?} is not allowed with operator TD"),
537                        };
538                        self.process_tf(font.clone(), size.clone());
539                        return Some(GraphicsInstruction::Tf(font, size));
540                    }
541                    b"Tr" => {
542                        let render = match &buf[0] {
543                            Token::Numeric(n) => n.clone(),
544                            t => panic!("Operand {t:?} is not allowed with operator Tr"),
545                        };
546                        self.process_tr(render.clone());
547                        return Some(GraphicsInstruction::Tr(render));
548                    }
549                    b"Tm" => {
550                        let a = match &buf[0] {
551                            Token::Numeric(n) => n.clone(),
552                            t => panic!("Operand {t:?} is not allowed with operator Tm"),
553                        };
554                        let b = match &buf[1] {
555                            Token::Numeric(n) => n.clone(),
556                            t => panic!("Operand {t:?} is not allowed with operator Tm"),
557                        };
558                        let c = match &buf[2] {
559                            Token::Numeric(n) => n.clone(),
560                            t => panic!("Operand {t:?} is not allowed with operator Tm"),
561                        };
562                        let d = match &buf[3] {
563                            Token::Numeric(n) => n.clone(),
564                            t => panic!("Operand {t:?} is not allowed with operator Tm"),
565                        };
566                        let e = match &buf[4] {
567                            Token::Numeric(n) => n.clone(),
568                            t => panic!("Operand {t:?} is not allowed with operator Tm"),
569                        };
570                        let f = match &buf[5] {
571                            Token::Numeric(n) => n.clone(),
572                            t => panic!("Operand {t:?} is not allowed with operator Tm"),
573                        };
574                        self.process_tm(
575                            a.clone(),
576                            b.clone(),
577                            c.clone(),
578                            d.clone(),
579                            e.clone(),
580                            f.clone(),
581                        );
582                        return Some(GraphicsInstruction::Tm(a, b, c, d, e, f));
583                    }
584                    b"T*" => {
585                        self.process_t_star();
586                        return Some(GraphicsInstruction::TStar);
587                    }
588                    b"Tj" => {
589                        let text = match &buf[0] {
590                            Token::LitteralString(l) => l,
591                            t => panic!("Operand {t:?} is not allowed with operator Tj"),
592                        };
593                        return Some(GraphicsInstruction::Tj(text.to_vec()));
594                    }
595                    b"TJ" => {
596                        return Some(GraphicsInstruction::TJ(
597                            buf.iter()
598                                .filter(|t| {
599                                    matches!(
600                                        t,
601                                        Token::LitteralString(_)
602                                            | Token::HexString(_)
603                                            | Token::Numeric(_)
604                                    )
605                                })
606                                .map(|t| match t {
607                                    Token::LitteralString(s) => ArrayVal::Text(s.to_vec()),
608                                    Token::HexString(s) => ArrayVal::Text(s.to_vec()),
609                                    Token::Numeric(n) => ArrayVal::Pos(n.clone()),
610                                    t => panic!("Impossible {t:?}"),
611                                })
612                                .collect(),
613                        ))
614                    }
615                    b"Do" => {
616                        return Some(GraphicsInstruction::Do(match &buf[0] {
617                            Token::Name(s) => s.clone(),
618                            t => panic!("Operand {t:?} is not allowed with operator Do"),
619                        }))
620                    }
621                    b"BDC" => return Some(GraphicsInstruction::BDC),
622                    b"BMC" => return Some(GraphicsInstruction::BMC),
623                    b"EMC" => return Some(GraphicsInstruction::EMC),
624                    s => println!(
625                        "Content token operator {:?} is not known, operands {:?}",
626                        String::from_utf8(s.to_vec()),
627                        buf
628                    ),
629                },
630                t => panic!("Pdf token {t:?} has no mapping implemented to ContentStream"),
631            }
632        }
633        None
634    }
635}
636
637// Text state operators (page 397)
638#[derive(Clone)]
639struct TextState {
640    tc: Number,          // char spacing
641    tw: Number,          // word spacing
642    th: Number,          // horizontal scaling
643    tl: Number,          // leading
644    tf: Option<String>,  // text font
645    tfs: Option<Number>, // text font size
646    tmode: Number,       // text rendering mode
647                         // trise: Number,       // text rise
648                         // tk: bool,            // text knockout
649}
650
651impl Default for TextState {
652    fn default() -> Self {
653        Self {
654            tc: Number::Integer(0),
655            tw: Number::Integer(0),
656            th: Number::Real(1.0),
657            tl: Number::Integer(0),
658            tf: None,
659            tfs: None,
660            tmode: Number::Integer(0),
661            // trise: Number::Integer(0),
662            // tk: true,
663        }
664    }
665}
666
667#[derive(Clone)]
668struct GraphicsState {
669    // device-independant state
670    ctm: Matrix, // current transformation matrix
671    // TODO: clipping_path,
672    // color_space: String, // current color space
673    // TODO: color,
674    text_state: TextState,
675    line_width: Number,
676    line_cap: Number,
677    // line_join: Number,
678    // miter_limit: Number,
679    // TODO: dash_pattern,
680    // rendering_intent: String,
681    // stroke_adjustment: bool,
682    // blend_mode: String,
683    // TODO: softmask,
684    // alpha_constant: Number,
685    // alpha_source: bool,
686    // device dependant state
687    // overprint: bool,
688    // overprint_mode: Number,
689    // TODO: black_generation,
690    // TODO: undercolor_removal
691    // TODO: transfer
692    // TODO: halftone
693    flatness: Number,
694    // TODO: smoothness: Number
695}
696
697impl Default for GraphicsState {
698    fn default() -> Self {
699        Self {
700            ctm: Matrix::default(), // identity matrix
701            // color_space: String::from("DeviceGray"),
702            text_state: TextState::default(),
703            line_width: Number::Real(1.0),
704            line_cap: Number::Integer(0), // square butt caps
705            // line_join: Number::Integer(0),
706            // miter_limit: Number::Real(10.0),
707            // rendering_intent: String::from("RelativeColorimetric"),
708            // stroke_adjustment: false,
709            // blend_mode: String::from("Normal"),
710            // alpha_constant: Number::Real(1.0),
711            // alpha_source: false,
712            // overprint: false,
713            // overprint_mode: Number::Integer(0),
714            flatness: Number::Real(1.0),
715        }
716    }
717}
718
719pub struct TextContent<'a> {
720    resources: Box<Resources>,
721    content: Content<'a>,
722}
723
724impl<'a> TextContent<'a> {
725    pub fn new(content_bytes: &'a [u8], resources: Box<Resources>) -> Self {
726        Self {
727            resources,
728            content: Content::from(Tokenizer::new(content_bytes, 0)),
729        }
730    }
731
732    pub fn get_text(&mut self, display_char: bool) -> String {
733        let mut output = String::new();
734        let mut tm_prev = self.content.text_object.tm;
735        while let Some(i) = self.content.next() {
736            match i {
737                GraphicsInstruction::Tj(text) => {
738                    let font = match self.content.graphic_state.text_state.tf {
739                        Some(ref s) => match &self.resources.font {
740                            Some(fontmap) => fontmap.0.get(s).unwrap(),
741                            None => panic!("Fontmap does not contains the font name {s:?}"),
742                        },
743                        None => panic!("Text state should have a font set"),
744                    };
745
746                    // detect a line feed if tm y coordinate has changed
747                    if self.content.text_object.tm.get_ty() != tm_prev.get_ty() {
748                        output += "\n";
749                    }
750                    tm_prev = self.content.text_object.tm;
751
752                    for c in text {
753                        if display_char {
754                            output += format!(
755                                "{:?}, {:?}, {:?}, {:}\n",
756                                c as char,
757                                font.subtype,
758                                font.base_font,
759                                self.content.text_object.tm
760                            )
761                            .as_str();
762                        } else {
763                            output.push(c as char);
764                        }
765                    }
766                }
767                GraphicsInstruction::TJ(text) => {
768                    // current font
769                    let font = match self.content.graphic_state.text_state.tf {
770                        Some(ref s) => match &self.resources.font {
771                            Some(fontmap) => fontmap.0.get(s).unwrap(),
772                            None => panic!("Fontmap does not contains the font name {s:?}"),
773                        },
774                        None => panic!("Text state should have a font set"),
775                    };
776
777                    // detect a line feed if tm y coordinate has changed
778                    if self.content.text_object.tm.get_ty() != tm_prev.get_ty() {
779                        output += "\n";
780                    }
781                    tm_prev = self.content.text_object.tm;
782
783                    // estimate current space width (before scaling)
784                    // let w_space = font.estimate_space_width();
785                    for c in text {
786                        match c {
787                            ArrayVal::Text(t) => {
788                                // string characters in to unicode map
789                                match &font.to_unicode {
790                                    Some(to_unicode_cmap) => {
791                                        let mut hex_iter = t.iter();
792                                        while let Some(c) = hex_iter.next() {
793                                            let char_idx = match to_unicode_cmap.is_two_bytes {
794                                                true => {
795                                                    *c as usize * 256
796                                                        + *hex_iter.next().unwrap() as usize
797                                                }
798                                                false => usize::from(*c),
799                                            };
800                                            let char = match to_unicode_cmap.cmap.get(&char_idx) {
801                                                Some(c) => c,
802                                                None => panic!("CMap does not contain a char with idx {:?}, charmap {:?}", char_idx, to_unicode_cmap.cmap)
803                                            };
804                                            // paint glyph
805                                            if display_char {
806                                                output += format!(
807                                                    "{:?}, {:?}, {:?}, {:}\n",
808                                                    char,
809                                                    font.subtype,
810                                                    font.base_font,
811                                                    self.content.text_object.tm
812                                                )
813                                                .as_str();
814                                            } else {
815                                                output.push(*char);
816                                            }
817                                            // displacement vector
818                                            let w0: Number = match font.clone().get_width(*c) {
819                                                Ok(n) => n,
820                                                Err(_) => Number::Real(0.0), // assumption at the moment, probably need to leverage Font Encoding
821                                            };
822                                            // let w1 = Number::Integer(0); // temporary, need to be updated with writing mode (horizontal writing only)
823                                            let tfs = match &self.content.graphic_state.text_state.tfs {
824                                                Some(n) => n,
825                                                None => panic!("Font size should be set before painting a glyph")
826                                            };
827                                            let tc =
828                                                self.content.graphic_state.text_state.tc.clone();
829                                            let tw =
830                                                self.content.graphic_state.text_state.tw.clone();
831                                            let th =
832                                                self.content.graphic_state.text_state.th.clone();
833                                            // update text matrix (page 410)
834                                            // translation vector coordinates
835                                            // tj displacement factor is added according to the text writing mode (assumed 0 for now) -> page 408
836                                            let mut tx = w0.clone() * tfs.clone() + tc.clone();
837                                            // tw displacement for word space
838                                            if *c == b' ' {
839                                                tx = tx + tw.clone();
840                                            }
841                                            tx = tx * th;
842                                            let ty = Number::Real(0.0);
843                                            // let ty = (w1.clone()) // + -tj.clone() / Number::Real(1000.0)) -> to be added if a different writing mode is selected
844                                            //     * tfs.clone()
845                                            //     + tc.clone()
846                                            //     + tw.clone();
847                                            self.content.text_object.tm =
848                                                Matrix::new(
849                                                    1.0,
850                                                    0.0,
851                                                    0.0,
852                                                    1.0,
853                                                    tx.into(),
854                                                    ty.into(),
855                                                ) * self.content.text_object.tm;
856                                        }
857                                    }
858                                    // no unicode mapping -> read as char
859                                    None => {
860                                        for c in t {
861                                            if display_char {
862                                                output += format!(
863                                                    "{:?}, {:?}, {:?}, {:}\n",
864                                                    c as char,
865                                                    font.subtype,
866                                                    font.base_font,
867                                                    self.content.text_object.tm
868                                                )
869                                                .as_str();
870                                            } else {
871                                                output.push(c as char);
872                                            }
873                                            // displacement vector
874                                            let w0: Number = match font.clone().get_width(c) {
875                                                Ok(w) => w,
876                                                Err(_) => Number::Real(0.0),
877                                            };
878                                            // let w1 = Number::Integer(0); // temporary, need to be updated with writing mode (horizontal writing only)
879                                            let tfs = match &self.content.graphic_state.text_state.tfs {
880                                                Some(n) => n,
881                                                None => panic!("Font size should be set before painting a glyph")
882                                            };
883                                            let tc =
884                                                self.content.graphic_state.text_state.tc.clone();
885                                            let tw =
886                                                self.content.graphic_state.text_state.tw.clone();
887                                            let th =
888                                                self.content.graphic_state.text_state.th.clone();
889                                            // update text matrix (page 410)
890                                            // translation vector coordinates is (tx, ty)
891                                            let mut tx = w0.clone() * tfs.clone() + tc.clone();
892                                            // tw displacement for word space
893                                            if c == b' ' {
894                                                tx = tx + tw.clone();
895                                            }
896                                            tx = tx * th;
897                                            let ty = Number::Real(0.0);
898                                            // let ty = (w1.clone()) // + -tj.clone() / Number::Real(1000.0))
899                                            //     * tfs.clone()
900                                            //     + tc.clone()
901                                            //     + tw.clone();
902                                            self.content.text_object.tm =
903                                                Matrix::new(
904                                                    1.0,
905                                                    0.0,
906                                                    0.0,
907                                                    1.0,
908                                                    tx.into(),
909                                                    ty.into(),
910                                                ) * self.content.text_object.tm;
911                                        }
912                                    }
913                                };
914                            }
915                            // translation according to text writing direction (assumed horizontal for now)
916                            ArrayVal::Pos(tj) => {
917                                let tfs = match &self.content.graphic_state.text_state.tfs {
918                                    Some(n) => n,
919                                    None => {
920                                        panic!("Font size should be set before painting a glyph")
921                                    }
922                                };
923                                let th = self.content.graphic_state.text_state.th.clone();
924                                let tx = -tj / Number::Real(1000.0) * tfs.clone() * th.clone();
925                                // apply transformation
926                                self.content.text_object.tm =
927                                    Matrix::new(1.0, 0.0, 0.0, 1.0, tx.clone().into(), 0.0)
928                                        * self.content.text_object.tm;
929                                // whitespace detected
930                                // to be improved...
931                                // match tx {
932                                //     Number::Integer(i) => {
933                                //         if i > 0 && !output.ends_with(' ') {
934                                //             output.push(' ');
935                                //         }
936                                //     }
937                                //     Number::Real(f) => {
938                                //         if f > 0.0 && !output.ends_with(' ') {
939                                //             output.push(' ');
940                                //         }
941                                //     }
942                                // }
943                            }
944                        }
945                    }
946                }
947                _ => (),
948            }
949        }
950        output.trim_start_matches(['\n', ' ']).to_string()
951    }
952}
953
954#[cfg(test)]
955mod tests {
956
957    use std::vec;
958
959    use super::*;
960
961    #[test]
962    fn test_tokens() {
963        let raw = b"BT\n70 50 TD\n/F1 12 Tf\n(Hello, world!) Tj\nET".as_slice();
964        let mut stream = Content::from(raw);
965        assert_eq!(stream.next(), Some(GraphicsInstruction::BeginText));
966        assert_eq!(
967            stream.next(),
968            Some(GraphicsInstruction::TD(
969                Number::Integer(70),
970                Number::Integer(50)
971            ))
972        );
973        assert_eq!(
974            stream.next(),
975            Some(GraphicsInstruction::Tf(
976                "F1".to_string(),
977                Number::Integer(12)
978            ))
979        );
980        assert_eq!(
981            stream.next(),
982            Some(GraphicsInstruction::Tj(b"Hello, world!".to_vec()))
983        );
984        assert_eq!(stream.next(), Some(GraphicsInstruction::EndText));
985        assert_eq!(stream.next(), None);
986    }
987
988    #[test]
989    fn test_stream_hexstrings() {
990        let raw = b"[<18>14<0D>2<06>7<14>1<04>-4<03>21<02>1<06>-2<04>-4<02>1<0906>]TJ".as_slice();
991        let mut stream = Content::from(raw);
992        assert_eq!(
993            stream.next(),
994            Some(GraphicsInstruction::TJ(vec![
995                ArrayVal::Text(vec![24]),
996                ArrayVal::Pos(Number::Integer(14)),
997                ArrayVal::Text(vec![13]),
998                ArrayVal::Pos(Number::Integer(2)),
999                ArrayVal::Text(vec![6]),
1000                ArrayVal::Pos(Number::Integer(7)),
1001                ArrayVal::Text(vec![20]),
1002                ArrayVal::Pos(Number::Integer(1)),
1003                ArrayVal::Text(vec![4]),
1004                ArrayVal::Pos(Number::Integer(-4)),
1005                ArrayVal::Text(vec![3]),
1006                ArrayVal::Pos(Number::Integer(21)),
1007                ArrayVal::Text(vec![2]),
1008                ArrayVal::Pos(Number::Integer(1)),
1009                ArrayVal::Text(vec![6]),
1010                ArrayVal::Pos(Number::Integer(-2)),
1011                ArrayVal::Text(vec![4]),
1012                ArrayVal::Pos(Number::Integer(-4)),
1013                ArrayVal::Text(vec![2]),
1014                ArrayVal::Pos(Number::Integer(1)),
1015                ArrayVal::Text(vec![9, 6]),
1016            ]))
1017        );
1018    }
1019
1020    #[test]
1021    fn test_tokenizer_dict() {
1022        let raw = b" /P <</MCID 0>> BDC q\n0.00000887 0 595.25 842 re".as_slice();
1023        let mut text_stream = Content::from(raw);
1024        assert_eq!(text_stream.next(), Some(GraphicsInstruction::BDC));
1025        assert_eq!(text_stream.next(), Some(GraphicsInstruction::LowerQ));
1026        assert_eq!(
1027            text_stream.next(),
1028            Some(GraphicsInstruction::Re(
1029                Number::Real(0.00000887),
1030                Number::Integer(0),
1031                Number::Real(595.25),
1032                Number::Integer(842)
1033            ))
1034        );
1035    }
1036
1037    #[test]
1038    fn test_tokenizer_complex() {
1039        let raw = b"BT\n/F33 8.9664 Tf 54 713.7733 Td[(v0)-525(:=)-525(ld)-525(state[748])-2625(//)-525(load)-525(primes)-525(from)-525(the)-525(trace)-525(activation)-525(record)]TJ".as_slice();
1040        let mut text_stream = Content::from(raw);
1041        assert_eq!(text_stream.next(), Some(GraphicsInstruction::BeginText));
1042        assert_eq!(
1043            text_stream.next(),
1044            Some(GraphicsInstruction::Tf(
1045                "F33".to_string(),
1046                Number::Real(8.9664)
1047            ))
1048        );
1049        assert_eq!(
1050            text_stream.next(),
1051            Some(GraphicsInstruction::Td(
1052                Number::Integer(54),
1053                Number::Real(713.7733)
1054            ))
1055        );
1056        assert_eq!(
1057            text_stream.next(),
1058            Some(GraphicsInstruction::TJ(vec![
1059                ArrayVal::Text(b"v0".to_vec()),
1060                ArrayVal::Pos(Number::Integer(-525)),
1061                ArrayVal::Text(b":=".to_vec()),
1062                ArrayVal::Pos(Number::Integer(-525)),
1063                ArrayVal::Text(b"ld".to_vec()),
1064                ArrayVal::Pos(Number::Integer(-525)),
1065                ArrayVal::Text(b"state[748]".to_vec()),
1066                ArrayVal::Pos(Number::Integer(-2625)),
1067                ArrayVal::Text(b"//".to_vec()),
1068                ArrayVal::Pos(Number::Integer(-525)),
1069                ArrayVal::Text(b"load".to_vec()),
1070                ArrayVal::Pos(Number::Integer(-525)),
1071                ArrayVal::Text(b"primes".to_vec()),
1072                ArrayVal::Pos(Number::Integer(-525)),
1073                ArrayVal::Text(b"from".to_vec()),
1074                ArrayVal::Pos(Number::Integer(-525)),
1075                ArrayVal::Text(b"the".to_vec()),
1076                ArrayVal::Pos(Number::Integer(-525)),
1077                ArrayVal::Text(b"trace".to_vec()),
1078                ArrayVal::Pos(Number::Integer(-525)),
1079                ArrayVal::Text(b"activation".to_vec()),
1080                ArrayVal::Pos(Number::Integer(-525)),
1081                ArrayVal::Text(b"record".to_vec()),
1082            ]))
1083        );
1084        assert_eq!(text_stream.next(), None);
1085    }
1086}