1use core::iter::Iterator;
2
3use crate::{
4 algebra::{Matrix, Number},
5 body::Resources,
6 object::Name,
7 tokenizer::{Token, Tokenizer},
8};
9
10#[derive(Default)]
11struct TextObject {
12 tm: Matrix, tlm: Matrix, }
15
16struct Content<'a> {
17 graphic_state: GraphicsState,
18 graphic_state_stack: Vec<GraphicsState>,
19 text_object: TextObject,
20 tokenizer: Tokenizer<'a>,
21}
22
23#[derive(Debug, PartialEq)]
24enum ArrayVal {
25 Text(Vec<u8>),
26 Pos(Number),
27}
28
29type DashArray = Vec<Number>;
30type DashPhase = Number;
31type LineWidth = Number;
32type LineStyle = Number;
33type X = Number;
34type Y = Number;
35type X1 = Number;
36type Y1 = Number;
37type X2 = Number;
38type Y2 = Number;
39type X3 = Number;
40type Y3 = Number;
41type Gray = Number; type R = Number;
43type G = Number;
44type B = Number;
45
46#[derive(Debug, PartialEq)]
47enum GraphicsInstruction {
48 LowerQ,
50 UpperQ,
51 BDC, BMC,
53 EMC,
54 Cm(Number, Number, Number, Number, Number, Number), LowerW(LineWidth), UpperJ(LineStyle), LowerD(DashArray, DashPhase), LowerI(Number), Gs, LowerM(X, Y), LowerL(X, Y), LowerC(X1, Y1, X2, Y2, X3, Y3), LowerH, Re(Number, Number, Number, Number), W,
68 WStar,
69 S,
71 LowerF,
72 LowerFStar, N,
74 Cs(String),
76 Sc(Number),
77 UpperG(Gray), LowerG(Gray), RG(Number, Number, Number), Rg(R, G, B),
81 Td(Number, Number), TD(Number, Number), Tm(Number, Number, Number, Number, Number, Number), TStar,
86 Tc(Number), Tf(String, Number), Tr(Number), Tj(Vec<u8>), TJ(Vec<ArrayVal>), BeginText,
95 EndText,
96 Do(String),
98}
99
100impl<'a> From<Tokenizer<'a>> for Content<'a> {
101 fn from(tokenizer: Tokenizer<'a>) -> Self {
102 Content {
103 graphic_state: GraphicsState::default(),
104 graphic_state_stack: vec![],
105 text_object: TextObject::default(),
106 tokenizer,
107 }
108 }
109}
110
111impl<'a> From<&'a [u8]> for Content<'a> {
112 fn from(bytes: &'a [u8]) -> Self {
113 Content {
114 graphic_state: GraphicsState::default(),
115 graphic_state_stack: vec![],
116 text_object: TextObject::default(),
117 tokenizer: Tokenizer::new(bytes, 0),
118 }
119 }
120}
121
122impl Content<'_> {
123 fn process_q(&mut self) {
124 self.graphic_state_stack.push(self.graphic_state.clone())
125 }
126
127 fn process_upper_q(&mut self) {
128 self.graphic_state = self
129 .graphic_state_stack
130 .pop()
131 .expect("Unable to restore graphic state from empty stack");
132 }
133
134 fn process_cm(&mut self, cm: [Number; 6]) {
135 self.graphic_state.ctm = Matrix::from(cm);
136 }
137
138 fn process_w(&mut self, line_width: Number) {
139 self.graphic_state.line_width = line_width;
140 }
141
142 fn process_upper_j(&mut self, line_cap: Number) {
143 self.graphic_state.line_cap = line_cap;
144 }
145
146 fn process_d(&mut self, _dash_array: DashArray) {}
147
148 fn process_i(&mut self, flatness: Number) {
149 self.graphic_state.flatness = flatness;
150 }
151
152 fn process_gs(&mut self, _dict_name: Name) {}
153
154 fn process_m(&mut self, _x: Number, _y: Number) {}
155
156 fn process_l(&mut self, _x: Number, _y: Number) {}
157
158 fn process_c(
159 &mut self,
160 _x1: Number,
161 _y1: Number,
162 _x2: Number,
163 _y2: Number,
164 _x3: Number,
165 _y3: Number,
166 ) {
167 }
168
169 fn process_re(&mut self, _x: Number, _y: Number, _width: Number, _height: Number) {}
170
171 fn process_bt(&mut self) {
172 self.text_object = TextObject::default();
173 }
174
175 fn process_tc(&mut self, tc: Number) {
176 self.graphic_state.text_state.tc = tc;
177 }
178
179 fn process_td(&mut self, tx: Number, ty: Number) {
180 self.text_object.tlm =
181 Matrix::new(1.0, 0.0, 0.0, 1.0, f32::from(tx), f32::from(ty)) * self.text_object.tlm;
182 self.text_object.tm = self.text_object.tlm;
183 }
184
185 fn process_t_upper_d(&mut self, tx: Number, ty: Number) {
186 self.graphic_state.text_state.tl = -ty.clone();
187 self.process_td(tx, ty);
188 }
189
190 fn process_tr(&mut self, render: Number) {
191 self.graphic_state.text_state.tmode = render;
192 }
193
194 fn process_tf(&mut self, font: String, size: Number) {
195 self.graphic_state.text_state.tf = Some(font);
196 self.graphic_state.text_state.tfs = Some(size);
197 }
198
199 fn process_tm(&mut self, a: Number, b: Number, c: Number, d: Number, e: Number, f: Number) {
200 self.text_object.tm = Matrix::new(
201 f32::from(a.clone()),
202 f32::from(b.clone()),
203 f32::from(c.clone()),
204 f32::from(d.clone()),
205 f32::from(e.clone()),
206 f32::from(f.clone()),
207 );
208 self.text_object.tlm = Matrix::new(
209 f32::from(a),
210 f32::from(b),
211 f32::from(c),
212 f32::from(d),
213 f32::from(e),
214 f32::from(f),
215 );
216 }
217
218 fn process_t_star(&mut self) {
219 self.process_td(Number::Integer(0), self.graphic_state.text_state.tl.clone());
220 }
221}
222
223impl Iterator for Content<'_> {
224 type Item = GraphicsInstruction;
225
226 fn next(&mut self) -> Option<Self::Item> {
227 let mut buf: Vec<Token> = vec![];
228 while let Some(t) = self.tokenizer.next() {
229 match t {
230 Token::LitteralString(_) => buf.push(t),
231 Token::Name(_) => buf.push(t),
232 Token::ArrayBegin => buf.push(t),
233 Token::ArrayEnd => buf.push(t),
234 Token::DictBegin => {
235 for t in self.tokenizer.by_ref() {
237 if t == Token::DictEnd {
238 break;
239 }
240 }
241 }
242 Token::HexString(_) => buf.push(t),
243 Token::Numeric(_) => buf.push(t),
244 Token::String(l) => match l.as_slice() {
245 b"q" => {
246 self.process_q();
247 return Some(GraphicsInstruction::LowerQ);
248 }
249 b"Q" => {
250 self.process_upper_q();
251 return Some(GraphicsInstruction::UpperQ);
252 }
253 b"cm" => {
254 let a = match &buf[0] {
255 Token::Numeric(n) => n.clone(),
256 t => panic!("Operand {t:?} is not allowed with operator re"),
257 };
258 let b = match &buf[1] {
259 Token::Numeric(n) => n.clone(),
260 t => panic!("Operand {t:?} is not allowed with operator re"),
261 };
262 let c = match &buf[2] {
263 Token::Numeric(n) => n.clone(),
264 t => panic!("Operand {t:?} is not allowed with operator re"),
265 };
266 let d = match &buf[3] {
267 Token::Numeric(n) => n.clone(),
268 t => panic!("Operand {t:?} is not allowed with operator re"),
269 };
270 let e = match &buf[4] {
271 Token::Numeric(n) => n.clone(),
272 t => panic!("Operand {t:?} is not allowed with operator re"),
273 };
274 let f = match &buf[5] {
275 Token::Numeric(n) => n.clone(),
276 t => panic!("Operand {t:?} is not allowed with operator re"),
277 };
278 self.process_cm([
279 a.clone(),
280 b.clone(),
281 c.clone(),
282 d.clone(),
283 e.clone(),
284 f.clone(),
285 ]);
286 return Some(GraphicsInstruction::Cm(a, b, c, d, e, f));
287 }
288 b"w" => {
289 let line_width = match &buf[0] {
290 Token::Numeric(n) => n.clone(),
291 t => panic!("Operand {t:?} is not allowed with operator J"),
292 };
293 self.process_w(line_width.clone());
294 return Some(GraphicsInstruction::LowerW(line_width));
295 }
296 b"J" => {
297 let line_cap = match &buf[0] {
298 Token::Numeric(n) => n.clone(),
299 t => panic!("Operand {t:?} is not allowed with operator J"),
300 };
301 self.process_upper_j(line_cap.clone());
302 return Some(GraphicsInstruction::UpperJ(line_cap));
303 }
304 b"d" => {
305 let mut e = buf.iter();
306 match e.next() {
307 Some(Token::ArrayBegin) => (),
308 Some(t) => panic!("First operand {t:?} is not allowed for operator d"),
309 None => panic!("End of stream too early"),
310 };
311 let mut dash_array = DashArray::new();
312 for t in e.by_ref() {
313 match t {
314 Token::Numeric(n) => dash_array.push(n.clone()),
315 Token::ArrayEnd => break,
316 t => panic!("Unexpected token {t:?} in dash array"),
317 }
318 }
319 let dash_phase = match e.next() {
320 Some(Token::Numeric(n)) => n.clone(),
321 Some(t) => panic!("First operand {t:?} is not allowed for operator d"),
322 None => panic!("End of stream too early"),
323 };
324 self.process_d(dash_array.clone());
325 return Some(GraphicsInstruction::LowerD(dash_array, dash_phase));
326 }
327 b"i" => {
328 let flatness = match &buf[0] {
329 Token::Numeric(n) => n.clone(),
330 t => panic!("Operand {t:?} is not allowed with operator re"),
331 };
332 self.process_i(flatness.clone());
333 return Some(GraphicsInstruction::LowerI(flatness));
334 }
335 b"gs" => {
336 let dict_name = match &buf[0] {
337 Token::Name(n) => n.clone(),
338 t => panic!("Operand {t:?} is not allowed with operator gs"),
339 };
340 self.process_gs(dict_name);
341 return Some(GraphicsInstruction::Gs);
342 }
343 b"m" => {
344 let x = match &buf[0] {
345 Token::Numeric(n) => n.clone(),
346 t => panic!("Operand {t:?} is not allowed with operator re"),
347 };
348 let y = match &buf[1] {
349 Token::Numeric(n) => n.clone(),
350 t => panic!("Operand {t:?} is not allowed with operator re"),
351 };
352 self.process_m(x.clone(), y.clone());
353 return Some(GraphicsInstruction::LowerM(x, y));
354 }
355 b"l" => {
356 let x = match &buf[0] {
357 Token::Numeric(n) => n.clone(),
358 t => panic!("Operand {t:?} is not allowed with operator re"),
359 };
360 let y = match &buf[1] {
361 Token::Numeric(n) => n.clone(),
362 t => panic!("Operand {t:?} is not allowed with operator re"),
363 };
364 self.process_l(x.clone(), y.clone());
365 return Some(GraphicsInstruction::LowerL(x, y));
366 }
367 b"c" => {
368 let x1 = match &buf[0] {
369 Token::Numeric(n) => n.clone(),
370 t => panic!("Operand {t:?} is not allowed with operator Tm"),
371 };
372 let y1 = match &buf[1] {
373 Token::Numeric(n) => n.clone(),
374 t => panic!("Operand {t:?} is not allowed with operator Tm"),
375 };
376 let x2 = match &buf[2] {
377 Token::Numeric(n) => n.clone(),
378 t => panic!("Operand {t:?} is not allowed with operator Tm"),
379 };
380 let y2 = match &buf[3] {
381 Token::Numeric(n) => n.clone(),
382 t => panic!("Operand {t:?} is not allowed with operator Tm"),
383 };
384 let x3 = match &buf[4] {
385 Token::Numeric(n) => n.clone(),
386 t => panic!("Operand {t:?} is not allowed with operator Tm"),
387 };
388 let y3 = match &buf[5] {
389 Token::Numeric(n) => n.clone(),
390 t => panic!("Operand {t:?} is not allowed with operator Tm"),
391 };
392 self.process_c(
393 x1.clone(),
394 y1.clone(),
395 x2.clone(),
396 y2.clone(),
397 x3.clone(),
398 y3.clone(),
399 );
400 return Some(GraphicsInstruction::LowerC(x1, y1, x2, y2, x3, y3));
401 }
402 b"h" => {
403 return Some(GraphicsInstruction::LowerH);
404 }
405 b"re" => {
406 let x = match &buf[0] {
407 Token::Numeric(n) => n.clone(),
408 t => panic!("Operand {t:?} is not allowed with operator re"),
409 };
410 let y = match &buf[1] {
411 Token::Numeric(n) => n.clone(),
412 t => panic!("Operand {t:?} is not allowed with operator re"),
413 };
414 let width = match &buf[2] {
415 Token::Numeric(n) => n.clone(),
416 t => panic!("Operand {t:?} is not allowed with operator re"),
417 };
418 let height = match &buf[3] {
419 Token::Numeric(n) => n.clone(),
420 t => panic!("Operand {t:?} is not allowed with operator re"),
421 };
422 self.process_re(x.clone(), y.clone(), width.clone(), height.clone());
423 return Some(GraphicsInstruction::Re(x, y, width, height));
424 }
425 b"W" => return Some(GraphicsInstruction::W),
426 b"W*" => return Some(GraphicsInstruction::WStar),
427 b"S" => return Some(GraphicsInstruction::S),
428 b"f" => return Some(GraphicsInstruction::LowerF),
429 b"f*" => return Some(GraphicsInstruction::LowerFStar),
430 b"n" => return Some(GraphicsInstruction::N),
431 b"cs" => {
432 let color_space = match &buf[0] {
433 Token::Name(s) => s.clone(),
434 t => panic!("Operand {t:?} is not allowed with operator cs"),
435 };
436 return Some(GraphicsInstruction::Cs(color_space));
437 }
438 b"sc" => {
439 let colors = match &buf[0] {
440 Token::Numeric(n) => n.clone(),
441 t => panic!("Operand {t:?} is not allowed with operator cs"),
442 };
443 return Some(GraphicsInstruction::Sc(colors));
444 }
445 b"G" => {
446 let gray = match &buf[0] {
447 Token::Numeric(n) => n.clone(),
448 t => panic!("Operand {t:?} is not allowed with operator G"),
449 };
450 return Some(GraphicsInstruction::UpperG(gray));
451 }
452 b"g" => {
453 let gray = match &buf[0] {
454 Token::Numeric(n) => n.clone(),
455 t => panic!("Operand {t:?} is not allowed with operator G"),
456 };
457 return Some(GraphicsInstruction::LowerG(gray));
458 }
459 b"RG" => {
460 let r = match &buf[0] {
461 Token::Numeric(n) => n.clone(),
462 t => panic!("Operand {t:?} is not allowed with operator rg"),
463 };
464 let g = match &buf[1] {
465 Token::Numeric(n) => n.clone(),
466 t => panic!("Operand {t:?} is not allowed with operator rg"),
467 };
468 let b = match &buf[2] {
469 Token::Numeric(n) => n.clone(),
470 t => panic!("Operand {t:?} is not allowed with operator rg"),
471 };
472 return Some(GraphicsInstruction::RG(r, g, b));
473 }
474 b"rg" => {
475 let r = match &buf[0] {
476 Token::Numeric(n) => n.clone(),
477 t => panic!("Operand {t:?} is not allowed with operator rg"),
478 };
479 let g = match &buf[1] {
480 Token::Numeric(n) => n.clone(),
481 t => panic!("Operand {t:?} is not allowed with operator rg"),
482 };
483 let b = match &buf[2] {
484 Token::Numeric(n) => n.clone(),
485 t => panic!("Operand {t:?} is not allowed with operator rg"),
486 };
487 return Some(GraphicsInstruction::Rg(r, g, b));
488 }
489 b"BT" => {
490 self.process_bt();
491 return Some(GraphicsInstruction::BeginText);
492 }
493 b"ET" => return Some(GraphicsInstruction::EndText),
494 b"TD" => {
495 let tx = match &buf[0] {
496 Token::Numeric(n) => n.clone(),
497 t => panic!("Operand {t:?} is not allowed with operator TD"),
498 };
499 let ty = match &buf[1] {
500 Token::Numeric(n) => n.clone(),
501 t => panic!("Operand {t:?} is not allowed with operator TD"),
502 };
503 self.process_t_upper_d(tx.clone(), ty.clone());
504 return Some(GraphicsInstruction::TD(tx, ty));
505 }
506 b"Td" => {
507 if buf.len() != 2 {
508 return self.next();
509 }
510 let tx = match &buf[0] {
511 Token::Numeric(n) => n.clone(),
512 t => panic!("Operand {t:?} is not allowed with operator TD"),
513 };
514 let ty = match &buf[1] {
515 Token::Numeric(n) => n.clone(),
516 t => panic!("Operand {t:?} is not allowed with operator TD"),
517 };
518 self.process_td(tx.clone(), ty.clone());
519 return Some(GraphicsInstruction::Td(tx, ty));
520 }
521 b"Tc" => {
522 let char_space = match &buf[0] {
523 Token::Numeric(n) => n.clone(),
524 t => panic!("Operand {t:?} is not allowed with operator TD"),
525 };
526 self.process_tc(char_space.clone());
527 return Some(GraphicsInstruction::Tc(char_space));
528 }
529 b"Tf" => {
530 let font = match &buf[0] {
531 Token::Name(n) => n.clone(),
532 t => panic!("Operand {t:?} is not allowed with operator TD"),
533 };
534 let size = match &buf[1] {
535 Token::Numeric(n) => n.clone(),
536 t => panic!("Operand {t:?} is not allowed with operator TD"),
537 };
538 self.process_tf(font.clone(), size.clone());
539 return Some(GraphicsInstruction::Tf(font, size));
540 }
541 b"Tr" => {
542 let render = match &buf[0] {
543 Token::Numeric(n) => n.clone(),
544 t => panic!("Operand {t:?} is not allowed with operator Tr"),
545 };
546 self.process_tr(render.clone());
547 return Some(GraphicsInstruction::Tr(render));
548 }
549 b"Tm" => {
550 let a = match &buf[0] {
551 Token::Numeric(n) => n.clone(),
552 t => panic!("Operand {t:?} is not allowed with operator Tm"),
553 };
554 let b = match &buf[1] {
555 Token::Numeric(n) => n.clone(),
556 t => panic!("Operand {t:?} is not allowed with operator Tm"),
557 };
558 let c = match &buf[2] {
559 Token::Numeric(n) => n.clone(),
560 t => panic!("Operand {t:?} is not allowed with operator Tm"),
561 };
562 let d = match &buf[3] {
563 Token::Numeric(n) => n.clone(),
564 t => panic!("Operand {t:?} is not allowed with operator Tm"),
565 };
566 let e = match &buf[4] {
567 Token::Numeric(n) => n.clone(),
568 t => panic!("Operand {t:?} is not allowed with operator Tm"),
569 };
570 let f = match &buf[5] {
571 Token::Numeric(n) => n.clone(),
572 t => panic!("Operand {t:?} is not allowed with operator Tm"),
573 };
574 self.process_tm(
575 a.clone(),
576 b.clone(),
577 c.clone(),
578 d.clone(),
579 e.clone(),
580 f.clone(),
581 );
582 return Some(GraphicsInstruction::Tm(a, b, c, d, e, f));
583 }
584 b"T*" => {
585 self.process_t_star();
586 return Some(GraphicsInstruction::TStar);
587 }
588 b"Tj" => {
589 let text = match &buf[0] {
590 Token::LitteralString(l) => l,
591 t => panic!("Operand {t:?} is not allowed with operator Tj"),
592 };
593 return Some(GraphicsInstruction::Tj(text.to_vec()));
594 }
595 b"TJ" => {
596 return Some(GraphicsInstruction::TJ(
597 buf.iter()
598 .filter(|t| {
599 matches!(
600 t,
601 Token::LitteralString(_)
602 | Token::HexString(_)
603 | Token::Numeric(_)
604 )
605 })
606 .map(|t| match t {
607 Token::LitteralString(s) => ArrayVal::Text(s.to_vec()),
608 Token::HexString(s) => ArrayVal::Text(s.to_vec()),
609 Token::Numeric(n) => ArrayVal::Pos(n.clone()),
610 t => panic!("Impossible {t:?}"),
611 })
612 .collect(),
613 ))
614 }
615 b"Do" => {
616 return Some(GraphicsInstruction::Do(match &buf[0] {
617 Token::Name(s) => s.clone(),
618 t => panic!("Operand {t:?} is not allowed with operator Do"),
619 }))
620 }
621 b"BDC" => return Some(GraphicsInstruction::BDC),
622 b"BMC" => return Some(GraphicsInstruction::BMC),
623 b"EMC" => return Some(GraphicsInstruction::EMC),
624 s => println!(
625 "Content token operator {:?} is not known, operands {:?}",
626 String::from_utf8(s.to_vec()),
627 buf
628 ),
629 },
630 t => panic!("Pdf token {t:?} has no mapping implemented to ContentStream"),
631 }
632 }
633 None
634 }
635}
636
637#[derive(Clone)]
639struct TextState {
640 tc: Number, tw: Number, th: Number, tl: Number, tf: Option<String>, tfs: Option<Number>, tmode: Number, }
650
651impl Default for TextState {
652 fn default() -> Self {
653 Self {
654 tc: Number::Integer(0),
655 tw: Number::Integer(0),
656 th: Number::Real(1.0),
657 tl: Number::Integer(0),
658 tf: None,
659 tfs: None,
660 tmode: Number::Integer(0),
661 }
664 }
665}
666
667#[derive(Clone)]
668struct GraphicsState {
669 ctm: Matrix, text_state: TextState,
675 line_width: Number,
676 line_cap: Number,
677 flatness: Number,
694 }
696
697impl Default for GraphicsState {
698 fn default() -> Self {
699 Self {
700 ctm: Matrix::default(), text_state: TextState::default(),
703 line_width: Number::Real(1.0),
704 line_cap: Number::Integer(0), flatness: Number::Real(1.0),
715 }
716 }
717}
718
719pub struct TextContent<'a> {
720 resources: Box<Resources>,
721 content: Content<'a>,
722}
723
724impl<'a> TextContent<'a> {
725 pub fn new(content_bytes: &'a [u8], resources: Box<Resources>) -> Self {
726 Self {
727 resources,
728 content: Content::from(Tokenizer::new(content_bytes, 0)),
729 }
730 }
731
732 pub fn get_text(&mut self, display_char: bool) -> String {
733 let mut output = String::new();
734 let mut tm_prev = self.content.text_object.tm;
735 while let Some(i) = self.content.next() {
736 match i {
737 GraphicsInstruction::Tj(text) => {
738 let font = match self.content.graphic_state.text_state.tf {
739 Some(ref s) => match &self.resources.font {
740 Some(fontmap) => fontmap.0.get(s).unwrap(),
741 None => panic!("Fontmap does not contains the font name {s:?}"),
742 },
743 None => panic!("Text state should have a font set"),
744 };
745
746 if self.content.text_object.tm.get_ty() != tm_prev.get_ty() {
748 output += "\n";
749 }
750 tm_prev = self.content.text_object.tm;
751
752 for c in text {
753 if display_char {
754 output += format!(
755 "{:?}, {:?}, {:?}, {:}\n",
756 c as char,
757 font.subtype,
758 font.base_font,
759 self.content.text_object.tm
760 )
761 .as_str();
762 } else {
763 output.push(c as char);
764 }
765 }
766 }
767 GraphicsInstruction::TJ(text) => {
768 let font = match self.content.graphic_state.text_state.tf {
770 Some(ref s) => match &self.resources.font {
771 Some(fontmap) => fontmap.0.get(s).unwrap(),
772 None => panic!("Fontmap does not contains the font name {s:?}"),
773 },
774 None => panic!("Text state should have a font set"),
775 };
776
777 if self.content.text_object.tm.get_ty() != tm_prev.get_ty() {
779 output += "\n";
780 }
781 tm_prev = self.content.text_object.tm;
782
783 for c in text {
786 match c {
787 ArrayVal::Text(t) => {
788 match &font.to_unicode {
790 Some(to_unicode_cmap) => {
791 let mut hex_iter = t.iter();
792 while let Some(c) = hex_iter.next() {
793 let char_idx = match to_unicode_cmap.is_two_bytes {
794 true => {
795 *c as usize * 256
796 + *hex_iter.next().unwrap() as usize
797 }
798 false => usize::from(*c),
799 };
800 let char = match to_unicode_cmap.cmap.get(&char_idx) {
801 Some(c) => c,
802 None => panic!("CMap does not contain a char with idx {:?}, charmap {:?}", char_idx, to_unicode_cmap.cmap)
803 };
804 if display_char {
806 output += format!(
807 "{:?}, {:?}, {:?}, {:}\n",
808 char,
809 font.subtype,
810 font.base_font,
811 self.content.text_object.tm
812 )
813 .as_str();
814 } else {
815 output.push(*char);
816 }
817 let w0: Number = match font.clone().get_width(*c) {
819 Ok(n) => n,
820 Err(_) => Number::Real(0.0), };
822 let tfs = match &self.content.graphic_state.text_state.tfs {
824 Some(n) => n,
825 None => panic!("Font size should be set before painting a glyph")
826 };
827 let tc =
828 self.content.graphic_state.text_state.tc.clone();
829 let tw =
830 self.content.graphic_state.text_state.tw.clone();
831 let th =
832 self.content.graphic_state.text_state.th.clone();
833 let mut tx = w0.clone() * tfs.clone() + tc.clone();
837 if *c == b' ' {
839 tx = tx + tw.clone();
840 }
841 tx = tx * th;
842 let ty = Number::Real(0.0);
843 self.content.text_object.tm =
848 Matrix::new(
849 1.0,
850 0.0,
851 0.0,
852 1.0,
853 tx.into(),
854 ty.into(),
855 ) * self.content.text_object.tm;
856 }
857 }
858 None => {
860 for c in t {
861 if display_char {
862 output += format!(
863 "{:?}, {:?}, {:?}, {:}\n",
864 c as char,
865 font.subtype,
866 font.base_font,
867 self.content.text_object.tm
868 )
869 .as_str();
870 } else {
871 output.push(c as char);
872 }
873 let w0: Number = match font.clone().get_width(c) {
875 Ok(w) => w,
876 Err(_) => Number::Real(0.0),
877 };
878 let tfs = match &self.content.graphic_state.text_state.tfs {
880 Some(n) => n,
881 None => panic!("Font size should be set before painting a glyph")
882 };
883 let tc =
884 self.content.graphic_state.text_state.tc.clone();
885 let tw =
886 self.content.graphic_state.text_state.tw.clone();
887 let th =
888 self.content.graphic_state.text_state.th.clone();
889 let mut tx = w0.clone() * tfs.clone() + tc.clone();
892 if c == b' ' {
894 tx = tx + tw.clone();
895 }
896 tx = tx * th;
897 let ty = Number::Real(0.0);
898 self.content.text_object.tm =
903 Matrix::new(
904 1.0,
905 0.0,
906 0.0,
907 1.0,
908 tx.into(),
909 ty.into(),
910 ) * self.content.text_object.tm;
911 }
912 }
913 };
914 }
915 ArrayVal::Pos(tj) => {
917 let tfs = match &self.content.graphic_state.text_state.tfs {
918 Some(n) => n,
919 None => {
920 panic!("Font size should be set before painting a glyph")
921 }
922 };
923 let th = self.content.graphic_state.text_state.th.clone();
924 let tx = -tj / Number::Real(1000.0) * tfs.clone() * th.clone();
925 self.content.text_object.tm =
927 Matrix::new(1.0, 0.0, 0.0, 1.0, tx.clone().into(), 0.0)
928 * self.content.text_object.tm;
929 }
944 }
945 }
946 }
947 _ => (),
948 }
949 }
950 output.trim_start_matches(['\n', ' ']).to_string()
951 }
952}
953
954#[cfg(test)]
955mod tests {
956
957 use std::vec;
958
959 use super::*;
960
961 #[test]
962 fn test_tokens() {
963 let raw = b"BT\n70 50 TD\n/F1 12 Tf\n(Hello, world!) Tj\nET".as_slice();
964 let mut stream = Content::from(raw);
965 assert_eq!(stream.next(), Some(GraphicsInstruction::BeginText));
966 assert_eq!(
967 stream.next(),
968 Some(GraphicsInstruction::TD(
969 Number::Integer(70),
970 Number::Integer(50)
971 ))
972 );
973 assert_eq!(
974 stream.next(),
975 Some(GraphicsInstruction::Tf(
976 "F1".to_string(),
977 Number::Integer(12)
978 ))
979 );
980 assert_eq!(
981 stream.next(),
982 Some(GraphicsInstruction::Tj(b"Hello, world!".to_vec()))
983 );
984 assert_eq!(stream.next(), Some(GraphicsInstruction::EndText));
985 assert_eq!(stream.next(), None);
986 }
987
988 #[test]
989 fn test_stream_hexstrings() {
990 let raw = b"[<18>14<0D>2<06>7<14>1<04>-4<03>21<02>1<06>-2<04>-4<02>1<0906>]TJ".as_slice();
991 let mut stream = Content::from(raw);
992 assert_eq!(
993 stream.next(),
994 Some(GraphicsInstruction::TJ(vec![
995 ArrayVal::Text(vec![24]),
996 ArrayVal::Pos(Number::Integer(14)),
997 ArrayVal::Text(vec![13]),
998 ArrayVal::Pos(Number::Integer(2)),
999 ArrayVal::Text(vec![6]),
1000 ArrayVal::Pos(Number::Integer(7)),
1001 ArrayVal::Text(vec![20]),
1002 ArrayVal::Pos(Number::Integer(1)),
1003 ArrayVal::Text(vec![4]),
1004 ArrayVal::Pos(Number::Integer(-4)),
1005 ArrayVal::Text(vec![3]),
1006 ArrayVal::Pos(Number::Integer(21)),
1007 ArrayVal::Text(vec![2]),
1008 ArrayVal::Pos(Number::Integer(1)),
1009 ArrayVal::Text(vec![6]),
1010 ArrayVal::Pos(Number::Integer(-2)),
1011 ArrayVal::Text(vec![4]),
1012 ArrayVal::Pos(Number::Integer(-4)),
1013 ArrayVal::Text(vec![2]),
1014 ArrayVal::Pos(Number::Integer(1)),
1015 ArrayVal::Text(vec![9, 6]),
1016 ]))
1017 );
1018 }
1019
1020 #[test]
1021 fn test_tokenizer_dict() {
1022 let raw = b" /P <</MCID 0>> BDC q\n0.00000887 0 595.25 842 re".as_slice();
1023 let mut text_stream = Content::from(raw);
1024 assert_eq!(text_stream.next(), Some(GraphicsInstruction::BDC));
1025 assert_eq!(text_stream.next(), Some(GraphicsInstruction::LowerQ));
1026 assert_eq!(
1027 text_stream.next(),
1028 Some(GraphicsInstruction::Re(
1029 Number::Real(0.00000887),
1030 Number::Integer(0),
1031 Number::Real(595.25),
1032 Number::Integer(842)
1033 ))
1034 );
1035 }
1036
1037 #[test]
1038 fn test_tokenizer_complex() {
1039 let raw = b"BT\n/F33 8.9664 Tf 54 713.7733 Td[(v0)-525(:=)-525(ld)-525(state[748])-2625(//)-525(load)-525(primes)-525(from)-525(the)-525(trace)-525(activation)-525(record)]TJ".as_slice();
1040 let mut text_stream = Content::from(raw);
1041 assert_eq!(text_stream.next(), Some(GraphicsInstruction::BeginText));
1042 assert_eq!(
1043 text_stream.next(),
1044 Some(GraphicsInstruction::Tf(
1045 "F33".to_string(),
1046 Number::Real(8.9664)
1047 ))
1048 );
1049 assert_eq!(
1050 text_stream.next(),
1051 Some(GraphicsInstruction::Td(
1052 Number::Integer(54),
1053 Number::Real(713.7733)
1054 ))
1055 );
1056 assert_eq!(
1057 text_stream.next(),
1058 Some(GraphicsInstruction::TJ(vec![
1059 ArrayVal::Text(b"v0".to_vec()),
1060 ArrayVal::Pos(Number::Integer(-525)),
1061 ArrayVal::Text(b":=".to_vec()),
1062 ArrayVal::Pos(Number::Integer(-525)),
1063 ArrayVal::Text(b"ld".to_vec()),
1064 ArrayVal::Pos(Number::Integer(-525)),
1065 ArrayVal::Text(b"state[748]".to_vec()),
1066 ArrayVal::Pos(Number::Integer(-2625)),
1067 ArrayVal::Text(b"//".to_vec()),
1068 ArrayVal::Pos(Number::Integer(-525)),
1069 ArrayVal::Text(b"load".to_vec()),
1070 ArrayVal::Pos(Number::Integer(-525)),
1071 ArrayVal::Text(b"primes".to_vec()),
1072 ArrayVal::Pos(Number::Integer(-525)),
1073 ArrayVal::Text(b"from".to_vec()),
1074 ArrayVal::Pos(Number::Integer(-525)),
1075 ArrayVal::Text(b"the".to_vec()),
1076 ArrayVal::Pos(Number::Integer(-525)),
1077 ArrayVal::Text(b"trace".to_vec()),
1078 ArrayVal::Pos(Number::Integer(-525)),
1079 ArrayVal::Text(b"activation".to_vec()),
1080 ArrayVal::Pos(Number::Integer(-525)),
1081 ArrayVal::Text(b"record".to_vec()),
1082 ]))
1083 );
1084 assert_eq!(text_stream.next(), None);
1085 }
1086}