1use std::collections::HashMap;
18
19use rpdfium_core::Name;
20use rpdfium_core::error::PdfError;
21
22use crate::tokenizer::{Token, Tokenizer, is_delimiter, is_whitespace};
23
24#[derive(Debug, Clone, PartialEq)]
26pub enum Operand {
27 Integer(i64),
28 Real(f64),
29 Name(Name),
30 String(Vec<u8>),
31 Array(Vec<Operand>),
32 Boolean(bool),
33 Null,
34}
35
36impl Operand {
37 pub fn as_f32(&self) -> Option<f32> {
39 match self {
40 Operand::Integer(n) => Some(*n as f32),
41 Operand::Real(f) => Some(*f as f32),
42 _ => None,
43 }
44 }
45
46 pub fn as_i64(&self) -> Option<i64> {
48 match self {
49 Operand::Integer(n) => Some(*n),
50 Operand::Real(f) => Some(*f as i64),
51 _ => None,
52 }
53 }
54}
55
56#[derive(Debug, Clone, PartialEq)]
58pub enum Operator {
59 BeginText,
62 EndText,
64 SetFont { name: Name, size: f32 },
66 MoveText { tx: f32, ty: f32 },
68 MoveTextSetLeading { tx: f32, ty: f32 },
70 SetTextMatrix {
72 a: f32,
73 b: f32,
74 c: f32,
75 d: f32,
76 e: f32,
77 f: f32,
78 },
79 NextLine,
81 ShowText { bytes: Vec<u8> },
83 ShowTextArray { elements: Vec<TextArrayElement> },
85 NextLineShowText { bytes: Vec<u8> },
87 SetSpacingShowText {
89 word_space: f32,
90 char_space: f32,
91 bytes: Vec<u8>,
92 },
93 SetCharSpacing { spacing: f32 },
95 SetWordSpacing { spacing: f32 },
97 SetHorizontalScaling { scale: f32 },
99 SetTextLeading { leading: f32 },
101 SetTextRise { rise: f32 },
103 SetTextRenderingMode { mode: i64 },
105
106 SaveState,
109 RestoreState,
111 ConcatMatrix {
113 a: f32,
114 b: f32,
115 c: f32,
116 d: f32,
117 e: f32,
118 f: f32,
119 },
120 SetLineWidth { width: f32 },
122 SetLineCap { cap: i64 },
124 SetLineJoin { join: i64 },
126 SetMiterLimit { limit: f32 },
128 SetDashPattern { array: Vec<f32>, phase: f32 },
130 SetRenderingIntent { intent: Name },
132 SetFlatness { flatness: f32 },
134 SetGraphicsState { name: Name },
136
137 MoveTo { x: f32, y: f32 },
140 LineTo { x: f32, y: f32 },
142 CurveTo {
144 x1: f32,
145 y1: f32,
146 x2: f32,
147 y2: f32,
148 x3: f32,
149 y3: f32,
150 },
151 CurveToInitial { x2: f32, y2: f32, x3: f32, y3: f32 },
153 CurveToFinal { x1: f32, y1: f32, x3: f32, y3: f32 },
155 ClosePath,
157 Rectangle { x: f32, y: f32, w: f32, h: f32 },
159
160 Stroke,
163 CloseAndStroke,
165 Fill,
167 FillObsolete,
169 FillEvenOdd,
171 FillStroke,
173 FillStrokeEvenOdd,
175 CloseFillStroke,
177 CloseFillStrokeEvenOdd,
179 EndPath,
181
182 Clip,
185 ClipEvenOdd,
187
188 SetColorSpaceStroke { name: Name },
191 SetColorSpaceFill { name: Name },
193 SetColorStroke { components: Vec<f32> },
195 SetColorFill { components: Vec<f32> },
197 SetColorStrokeN {
199 components: Vec<f32>,
200 name: Option<Name>,
201 },
202 SetColorFillN {
204 components: Vec<f32>,
205 name: Option<Name>,
206 },
207 SetGrayStroke { gray: f32 },
209 SetGrayFill { gray: f32 },
211 SetRgbStroke { r: f32, g: f32, b: f32 },
213 SetRgbFill { r: f32, g: f32, b: f32 },
215 SetCmykStroke { c: f32, m: f32, y: f32, k: f32 },
217 SetCmykFill { c: f32, m: f32, y: f32, k: f32 },
219
220 PaintXObject { name: Name },
223 PaintShading { name: Name },
225 InlineImage {
227 properties: HashMap<Name, Operand>,
228 data: Vec<u8>,
229 },
230
231 BeginMarkedContent { tag: Name },
234 BeginMarkedContentDict { tag: Name, properties: Operand },
236 EndMarkedContent,
238 MarkedContentPoint { tag: Name },
240 MarkedContentPointDict { tag: Name, properties: Operand },
242
243 SetCharWidth { wx: f32, wy: f32 },
246 SetCacheDevice {
248 wx: f32,
249 wy: f32,
250 llx: f32,
251 lly: f32,
252 urx: f32,
253 ury: f32,
254 },
255
256 BeginCompat,
259 EndCompat,
261
262 Unknown {
264 keyword: Vec<u8>,
265 operands: Vec<Operand>,
266 },
267}
268
269#[derive(Debug, Clone, PartialEq)]
271pub enum TextArrayElement {
272 Text(Vec<u8>),
274 Adjustment(f32),
276}
277
278pub fn tokenize_content_stream(data: &[u8]) -> Result<Vec<Operator>, PdfError> {
283 let mut tok = Tokenizer::new(data);
284 let mut operands: Vec<Operand> = Vec::new();
285 let mut operators = Vec::new();
286
287 loop {
288 let token = match tok.next_token() {
289 Some(Ok(t)) => t,
290 Some(Err(_)) => {
291 continue;
293 }
294 None => break,
295 };
296
297 match token {
298 Token::Integer(n) => operands.push(Operand::Integer(n)),
299 Token::Real(f) => operands.push(Operand::Real(f)),
300 Token::Name(n) => operands.push(Operand::Name(n)),
301 Token::String(s) => operands.push(Operand::String(s.as_bytes().to_vec())),
302 Token::Boolean(b) => operands.push(Operand::Boolean(b)),
303 Token::Null => operands.push(Operand::Null),
304 Token::ArrayStart => {
305 let arr = read_operand_array(&mut tok)?;
307 operands.push(Operand::Array(arr));
308 }
309 Token::Ref(_) => {
310 tracing::warn!("indirect reference in content stream, ignoring");
312 }
313 Token::Keyword(kw) => {
314 let op = build_operator(&kw, &mut operands, &mut tok);
315 operators.push(op);
316 }
317 Token::DictStart | Token::DictEnd | Token::ArrayEnd | Token::Comment(_) => {
318 }
320 }
321 }
322
323 Ok(operators)
324}
325
326fn read_operand_array(tok: &mut Tokenizer<'_>) -> Result<Vec<Operand>, PdfError> {
328 let mut arr = Vec::new();
329 loop {
330 match tok.next_token() {
331 Some(Ok(Token::ArrayEnd)) => return Ok(arr),
332 Some(Ok(Token::Integer(n))) => arr.push(Operand::Integer(n)),
333 Some(Ok(Token::Real(f))) => arr.push(Operand::Real(f)),
334 Some(Ok(Token::String(s))) => {
335 arr.push(Operand::String(s.as_bytes().to_vec()));
336 }
337 Some(Ok(Token::Name(n))) => arr.push(Operand::Name(n)),
338 Some(Ok(Token::Boolean(b))) => arr.push(Operand::Boolean(b)),
339 Some(Ok(Token::Null)) => arr.push(Operand::Null),
340 None => return Ok(arr),
341 _ => continue,
342 }
343 }
344}
345
346fn build_operator(
348 keyword: &[u8],
349 operands: &mut Vec<Operand>,
350 tok: &mut Tokenizer<'_>,
351) -> Operator {
352 let op = match keyword {
353 b"BT" => Operator::BeginText,
355 b"ET" => Operator::EndText,
356 b"Tf" => {
357 let size = pop_f32(operands);
358 let name = pop_name(operands);
359 Operator::SetFont { name, size }
360 }
361 b"Td" => {
362 let ty = pop_f32(operands);
363 let tx = pop_f32(operands);
364 Operator::MoveText { tx, ty }
365 }
366 b"TD" => {
367 let ty = pop_f32(operands);
368 let tx = pop_f32(operands);
369 Operator::MoveTextSetLeading { tx, ty }
370 }
371 b"Tm" => {
372 let f = pop_f32(operands);
373 let e = pop_f32(operands);
374 let d = pop_f32(operands);
375 let c = pop_f32(operands);
376 let b = pop_f32(operands);
377 let a = pop_f32(operands);
378 Operator::SetTextMatrix { a, b, c, d, e, f }
379 }
380 b"T*" => Operator::NextLine,
381 b"Tj" => {
382 let bytes = pop_bytes(operands);
383 Operator::ShowText { bytes }
384 }
385 b"TJ" => {
386 let elements = pop_text_array(operands);
387 Operator::ShowTextArray { elements }
388 }
389 b"'" => {
390 let bytes = pop_bytes(operands);
391 Operator::NextLineShowText { bytes }
392 }
393 b"\"" => {
394 let bytes = pop_bytes(operands);
395 let char_space = pop_f32(operands);
396 let word_space = pop_f32(operands);
397 Operator::SetSpacingShowText {
398 word_space,
399 char_space,
400 bytes,
401 }
402 }
403 b"Tc" => Operator::SetCharSpacing {
404 spacing: pop_f32(operands),
405 },
406 b"Tw" => Operator::SetWordSpacing {
407 spacing: pop_f32(operands),
408 },
409 b"Tz" => Operator::SetHorizontalScaling {
410 scale: pop_f32(operands),
411 },
412 b"TL" => Operator::SetTextLeading {
413 leading: pop_f32(operands),
414 },
415 b"Ts" => Operator::SetTextRise {
416 rise: pop_f32(operands),
417 },
418 b"Tr" => Operator::SetTextRenderingMode {
419 mode: pop_i64(operands),
420 },
421
422 b"q" => Operator::SaveState,
424 b"Q" => Operator::RestoreState,
425 b"cm" => {
426 let f = pop_f32(operands);
427 let e = pop_f32(operands);
428 let d = pop_f32(operands);
429 let c = pop_f32(operands);
430 let b = pop_f32(operands);
431 let a = pop_f32(operands);
432 Operator::ConcatMatrix { a, b, c, d, e, f }
433 }
434 b"w" => Operator::SetLineWidth {
435 width: pop_f32(operands),
436 },
437 b"J" => Operator::SetLineCap {
438 cap: pop_i64(operands),
439 },
440 b"j" => Operator::SetLineJoin {
441 join: pop_i64(operands),
442 },
443 b"M" => Operator::SetMiterLimit {
444 limit: pop_f32(operands),
445 },
446 b"d" => {
447 let phase = pop_f32(operands);
448 let array = pop_f32_array(operands);
449 Operator::SetDashPattern { array, phase }
450 }
451 b"ri" => Operator::SetRenderingIntent {
452 intent: pop_name(operands),
453 },
454 b"i" => Operator::SetFlatness {
455 flatness: pop_f32(operands),
456 },
457 b"gs" => Operator::SetGraphicsState {
458 name: pop_name(operands),
459 },
460
461 b"m" => {
463 let y = pop_f32(operands);
464 let x = pop_f32(operands);
465 Operator::MoveTo { x, y }
466 }
467 b"l" => {
468 let y = pop_f32(operands);
469 let x = pop_f32(operands);
470 Operator::LineTo { x, y }
471 }
472 b"c" => {
473 let y3 = pop_f32(operands);
474 let x3 = pop_f32(operands);
475 let y2 = pop_f32(operands);
476 let x2 = pop_f32(operands);
477 let y1 = pop_f32(operands);
478 let x1 = pop_f32(operands);
479 Operator::CurveTo {
480 x1,
481 y1,
482 x2,
483 y2,
484 x3,
485 y3,
486 }
487 }
488 b"v" => {
489 let y3 = pop_f32(operands);
490 let x3 = pop_f32(operands);
491 let y2 = pop_f32(operands);
492 let x2 = pop_f32(operands);
493 Operator::CurveToInitial { x2, y2, x3, y3 }
494 }
495 b"y" => {
496 let y3 = pop_f32(operands);
497 let x3 = pop_f32(operands);
498 let y1 = pop_f32(operands);
499 let x1 = pop_f32(operands);
500 Operator::CurveToFinal { x1, y1, x3, y3 }
501 }
502 b"h" => Operator::ClosePath,
503 b"re" => {
504 let h = pop_f32(operands);
505 let w = pop_f32(operands);
506 let y = pop_f32(operands);
507 let x = pop_f32(operands);
508 Operator::Rectangle { x, y, w, h }
509 }
510
511 b"S" => Operator::Stroke,
513 b"s" => Operator::CloseAndStroke,
514 b"f" => Operator::Fill,
515 b"F" => Operator::FillObsolete,
516 b"f*" => Operator::FillEvenOdd,
517 b"B" => Operator::FillStroke,
518 b"B*" => Operator::FillStrokeEvenOdd,
519 b"b" => Operator::CloseFillStroke,
520 b"b*" => Operator::CloseFillStrokeEvenOdd,
521 b"n" => Operator::EndPath,
522
523 b"W" => Operator::Clip,
525 b"W*" => Operator::ClipEvenOdd,
526
527 b"CS" => Operator::SetColorSpaceStroke {
529 name: pop_name(operands),
530 },
531 b"cs" => Operator::SetColorSpaceFill {
532 name: pop_name(operands),
533 },
534 b"SC" => Operator::SetColorStroke {
535 components: drain_f32(operands),
536 },
537 b"sc" => Operator::SetColorFill {
538 components: drain_f32(operands),
539 },
540 b"SCN" => {
541 let (components, name) = drain_f32_with_optional_name(operands);
542 Operator::SetColorStrokeN { components, name }
543 }
544 b"scn" => {
545 let (components, name) = drain_f32_with_optional_name(operands);
546 Operator::SetColorFillN { components, name }
547 }
548 b"G" => Operator::SetGrayStroke {
549 gray: pop_f32(operands),
550 },
551 b"g" => Operator::SetGrayFill {
552 gray: pop_f32(operands),
553 },
554 b"RG" => {
555 let b = pop_f32(operands);
556 let g = pop_f32(operands);
557 let r = pop_f32(operands);
558 Operator::SetRgbStroke { r, g, b }
559 }
560 b"rg" => {
561 let b = pop_f32(operands);
562 let g = pop_f32(operands);
563 let r = pop_f32(operands);
564 Operator::SetRgbFill { r, g, b }
565 }
566 b"K" => {
567 let k = pop_f32(operands);
568 let y = pop_f32(operands);
569 let m = pop_f32(operands);
570 let c = pop_f32(operands);
571 Operator::SetCmykStroke { c, m, y, k }
572 }
573 b"k" => {
574 let k = pop_f32(operands);
575 let y = pop_f32(operands);
576 let m = pop_f32(operands);
577 let c = pop_f32(operands);
578 Operator::SetCmykFill { c, m, y, k }
579 }
580
581 b"Do" => Operator::PaintXObject {
583 name: pop_name(operands),
584 },
585
586 b"sh" => Operator::PaintShading {
588 name: pop_name(operands),
589 },
590
591 b"BI" => {
593 let (properties, data) = read_inline_image(tok);
594 Operator::InlineImage { properties, data }
595 }
596
597 b"BMC" => Operator::BeginMarkedContent {
599 tag: pop_name(operands),
600 },
601 b"BDC" => {
602 let properties = operands.pop().unwrap_or(Operand::Null);
603 let tag = pop_name(operands);
604 Operator::BeginMarkedContentDict { tag, properties }
605 }
606 b"EMC" => Operator::EndMarkedContent,
607 b"MP" => Operator::MarkedContentPoint {
608 tag: pop_name(operands),
609 },
610 b"DP" => {
611 let properties = operands.pop().unwrap_or(Operand::Null);
612 let tag = pop_name(operands);
613 Operator::MarkedContentPointDict { tag, properties }
614 }
615
616 b"d0" => {
618 let wy = pop_f32(operands);
619 let wx = pop_f32(operands);
620 Operator::SetCharWidth { wx, wy }
621 }
622 b"d1" => {
623 let ury = pop_f32(operands);
624 let urx = pop_f32(operands);
625 let lly = pop_f32(operands);
626 let llx = pop_f32(operands);
627 let wy = pop_f32(operands);
628 let wx = pop_f32(operands);
629 Operator::SetCacheDevice {
630 wx,
631 wy,
632 llx,
633 lly,
634 urx,
635 ury,
636 }
637 }
638
639 b"BX" => Operator::BeginCompat,
641 b"EX" => Operator::EndCompat,
642
643 _ => {
644 let all_operands = std::mem::take(operands);
645 return Operator::Unknown {
646 keyword: keyword.to_vec(),
647 operands: all_operands,
648 };
649 }
650 };
651
652 operands.clear();
653 op
654}
655
656fn expand_inline_key(abbr: &[u8]) -> Name {
658 match abbr {
659 b"BPC" => Name::from("BitsPerComponent"),
660 b"CS" => Name::from("ColorSpace"),
661 b"D" => Name::from("Decode"),
662 b"DP" => Name::from("DecodeParms"),
663 b"F" => Name::from("Filter"),
664 b"H" => Name::from("Height"),
665 b"IM" => Name::from("ImageMask"),
666 b"I" => Name::from("Interpolate"),
667 b"W" => Name::from("Width"),
668 b"L" => Name::from("Length"),
669 _ => Name::from_bytes(abbr.to_vec()),
670 }
671}
672
673fn expand_inline_name_value(name: &Name) -> Name {
675 match name.as_bytes() {
676 b"G" => Name::from("DeviceGray"),
678 b"RGB" => Name::from("DeviceRGB"),
679 b"CMYK" => Name::from("DeviceCMYK"),
680 b"I" => Name::from("Indexed"),
681 b"AHx" => Name::from("ASCIIHexDecode"),
683 b"A85" => Name::from("ASCII85Decode"),
684 b"LZW" => Name::from("LZWDecode"),
685 b"Fl" => Name::from("FlateDecode"),
686 b"RL" => Name::from("RunLengthDecode"),
687 b"CCF" => Name::from("CCITTFaxDecode"),
688 b"DCT" => Name::from("DCTDecode"),
689 _ => name.clone(),
690 }
691}
692
693fn expand_inline_value(operand: Operand) -> Operand {
695 match operand {
696 Operand::Name(n) => Operand::Name(expand_inline_name_value(&n)),
697 Operand::Array(arr) => Operand::Array(arr.into_iter().map(expand_inline_value).collect()),
698 other => other,
699 }
700}
701
702fn read_inline_image(tok: &mut Tokenizer<'_>) -> (HashMap<Name, Operand>, Vec<u8>) {
704 let empty = (HashMap::new(), Vec::new());
705
706 let mut properties = HashMap::new();
708 loop {
709 match tok.next_token() {
710 Some(Ok(Token::Keyword(ref kw))) if kw == b"ID" => break,
711 Some(Ok(Token::Name(key))) => {
712 let expanded_key = expand_inline_key(key.as_bytes());
713 let value = match tok.next_token() {
715 Some(Ok(Token::Integer(n))) => Operand::Integer(n),
716 Some(Ok(Token::Real(f))) => Operand::Real(f),
717 Some(Ok(Token::Name(n))) => Operand::Name(n),
718 Some(Ok(Token::String(s))) => Operand::String(s.as_bytes().to_vec()),
719 Some(Ok(Token::Boolean(b))) => Operand::Boolean(b),
720 Some(Ok(Token::Null)) => Operand::Null,
721 Some(Ok(Token::ArrayStart)) => match read_operand_array(tok) {
722 Ok(arr) => Operand::Array(arr),
723 Err(_) => return empty,
724 },
725 Some(Ok(Token::Keyword(ref kw))) if kw == b"ID" => {
726 properties.insert(expanded_key, Operand::Null);
728 break;
729 }
730 _ => return empty,
731 };
732 let value = expand_inline_value(value);
733 properties.insert(expanded_key, value);
734 }
735 None => return empty,
736 _ => continue,
737 }
738 }
739
740 let source = tok.source();
742 let pos = tok.position();
743 if pos >= source.len() {
744 return (properties, Vec::new());
745 }
746 let data_start = pos + 1;
748
749 let mut i = data_start;
751 let data;
752 loop {
753 if i + 2 >= source.len() {
754 data = source[data_start..source.len()].to_vec();
756 tok.set_position(source.len());
757 return (properties, data);
758 }
759 if is_whitespace(source[i]) && source[i + 1] == b'E' && source[i + 2] == b'I' {
761 let after_ei = i + 3;
763 if after_ei >= source.len()
764 || is_whitespace(source[after_ei])
765 || is_delimiter(source[after_ei])
766 {
767 data = source[data_start..i].to_vec();
768 tok.set_position(after_ei);
769 return (properties, data);
770 }
771 }
772 i += 1;
773 }
774}
775
776fn pop_f32(operands: &mut Vec<Operand>) -> f32 {
779 operands.pop().and_then(|op| op.as_f32()).unwrap_or(0.0)
780}
781
782fn pop_i64(operands: &mut Vec<Operand>) -> i64 {
783 operands.pop().and_then(|op| op.as_i64()).unwrap_or(0)
784}
785
786fn pop_name(operands: &mut Vec<Operand>) -> Name {
787 match operands.pop() {
788 Some(Operand::Name(n)) => n,
789 _ => Name::from_bytes(Vec::new()),
790 }
791}
792
793fn pop_bytes(operands: &mut Vec<Operand>) -> Vec<u8> {
794 match operands.pop() {
795 Some(Operand::String(b)) => b,
796 _ => Vec::new(),
797 }
798}
799
800fn pop_text_array(operands: &mut Vec<Operand>) -> Vec<TextArrayElement> {
801 match operands.pop() {
802 Some(Operand::Array(arr)) => arr
803 .into_iter()
804 .map(|op| match op {
805 Operand::String(b) => TextArrayElement::Text(b),
806 Operand::Integer(n) => TextArrayElement::Adjustment(n as f32),
807 Operand::Real(f) => TextArrayElement::Adjustment(f as f32),
808 _ => TextArrayElement::Adjustment(0.0),
809 })
810 .collect(),
811 _ => Vec::new(),
812 }
813}
814
815fn pop_f32_array(operands: &mut Vec<Operand>) -> Vec<f32> {
816 match operands.pop() {
817 Some(Operand::Array(arr)) => arr.iter().filter_map(|op| op.as_f32()).collect(),
818 _ => Vec::new(),
819 }
820}
821
822fn drain_f32(operands: &mut Vec<Operand>) -> Vec<f32> {
823 let result: Vec<f32> = operands.iter().filter_map(|op| op.as_f32()).collect();
824 operands.clear();
825 result
826}
827
828fn drain_f32_with_optional_name(operands: &mut Vec<Operand>) -> (Vec<f32>, Option<Name>) {
829 let mut name = None;
830 let mut components = Vec::new();
831
832 for op in operands.drain(..) {
833 match op {
834 Operand::Name(n) => name = Some(n),
835 Operand::Integer(n) => components.push(n as f32),
836 Operand::Real(f) => components.push(f as f32),
837 _ => {}
838 }
839 }
840
841 (components, name)
842}
843
844#[cfg(test)]
845mod tests {
846 use super::*;
847
848 #[test]
849 fn test_tokenize_simple_text() {
850 let data = b"BT /F1 12 Tf 100 700 Td (Hello World) Tj ET";
851 let ops = tokenize_content_stream(data).unwrap();
852
853 assert!(matches!(ops[0], Operator::BeginText));
854 assert!(matches!(ops[1], Operator::SetFont { .. }));
855 assert!(matches!(ops[2], Operator::MoveText { .. }));
856 assert!(matches!(ops[3], Operator::ShowText { .. }));
857 assert!(matches!(ops[4], Operator::EndText));
858 }
859
860 #[test]
861 fn test_tokenize_path_operators() {
862 let data = b"100 200 300 400 re f S";
863 let ops = tokenize_content_stream(data).unwrap();
864
865 match &ops[0] {
866 Operator::Rectangle { x, y, w, h } => {
867 assert_eq!(*x, 100.0);
868 assert_eq!(*y, 200.0);
869 assert_eq!(*w, 300.0);
870 assert_eq!(*h, 400.0);
871 }
872 _ => panic!("expected Rectangle"),
873 }
874 assert!(matches!(ops[1], Operator::Fill));
875 assert!(matches!(ops[2], Operator::Stroke));
876 }
877
878 #[test]
879 fn test_tokenize_graphics_state() {
880 let data = b"q 1 0 0 1 100 200 cm Q";
881 let ops = tokenize_content_stream(data).unwrap();
882
883 assert!(matches!(ops[0], Operator::SaveState));
884 assert!(matches!(ops[1], Operator::ConcatMatrix { .. }));
885 assert!(matches!(ops[2], Operator::RestoreState));
886 }
887
888 #[test]
889 fn test_tokenize_color_operators() {
890 let data = b"1 0 0 rg 0.5 G";
891 let ops = tokenize_content_stream(data).unwrap();
892
893 match &ops[0] {
894 Operator::SetRgbFill { r, g, b } => {
895 assert_eq!(*r, 1.0);
896 assert_eq!(*g, 0.0);
897 assert_eq!(*b, 0.0);
898 }
899 _ => panic!("expected SetRgbFill"),
900 }
901 match &ops[1] {
902 Operator::SetGrayStroke { gray } => {
903 assert_eq!(*gray, 0.5);
904 }
905 _ => panic!("expected SetGrayStroke"),
906 }
907 }
908
909 #[test]
910 fn test_tokenize_text_array() {
911 let data = b"[(Hello) -50 (World)] TJ";
912 let ops = tokenize_content_stream(data).unwrap();
913
914 match &ops[0] {
915 Operator::ShowTextArray { elements } => {
916 assert_eq!(elements.len(), 3);
917 assert!(matches!(&elements[0], TextArrayElement::Text(b) if b == b"Hello"));
918 assert!(matches!(
919 &elements[1],
920 TextArrayElement::Adjustment(a) if *a == -50.0
921 ));
922 assert!(matches!(&elements[2], TextArrayElement::Text(b) if b == b"World"));
923 }
924 _ => panic!("expected ShowTextArray"),
925 }
926 }
927
928 #[test]
929 fn test_tokenize_marked_content() {
930 let data = b"/OC BMC (Hello) Tj EMC";
931 let ops = tokenize_content_stream(data).unwrap();
932
933 assert!(matches!(ops[0], Operator::BeginMarkedContent { .. }));
934 assert!(matches!(ops[1], Operator::ShowText { .. }));
935 assert!(matches!(ops[2], Operator::EndMarkedContent));
936 }
937
938 #[test]
939 fn test_tokenize_xobject() {
940 let data = b"/Im0 Do";
941 let ops = tokenize_content_stream(data).unwrap();
942
943 match &ops[0] {
944 Operator::PaintXObject { name } => {
945 assert_eq!(name.as_bytes(), b"Im0");
946 }
947 _ => panic!("expected PaintXObject"),
948 }
949 }
950
951 #[test]
952 fn test_tokenize_unknown_operator() {
953 let data = b"42 ZZ";
954 let ops = tokenize_content_stream(data).unwrap();
955
956 match &ops[0] {
957 Operator::Unknown { keyword, operands } => {
958 assert_eq!(keyword, b"ZZ");
959 assert_eq!(operands.len(), 1);
960 }
961 _ => panic!("expected Unknown operator"),
962 }
963 }
964
965 #[test]
966 fn test_tokenize_empty_stream() {
967 let ops = tokenize_content_stream(b"").unwrap();
968 assert!(ops.is_empty());
969 }
970
971 #[test]
972 #[allow(clippy::approx_constant)]
973 fn test_operand_as_f32() {
974 assert_eq!(Operand::Integer(42).as_f32(), Some(42.0));
975 assert_eq!(Operand::Real(3.14).as_f32(), Some(3.14));
976 assert_eq!(Operand::Null.as_f32(), None);
977 }
978
979 #[test]
980 fn test_operand_as_i64() {
981 assert_eq!(Operand::Integer(42).as_i64(), Some(42));
982 assert_eq!(Operand::Null.as_i64(), None);
983 }
984
985 #[test]
986 fn test_set_font_operator() {
987 let data = b"BT /F1 12 Tf ET";
988 let ops = tokenize_content_stream(data).unwrap();
989 match &ops[1] {
990 Operator::SetFont { name, size } => {
991 assert_eq!(name.as_bytes(), b"F1");
992 assert_eq!(*size, 12.0);
993 }
994 _ => panic!("expected SetFont"),
995 }
996 }
997
998 #[test]
999 fn test_set_dash_pattern() {
1000 let data = b"[3 5] 0 d";
1001 let ops = tokenize_content_stream(data).unwrap();
1002 match &ops[0] {
1003 Operator::SetDashPattern { array, phase } => {
1004 assert_eq!(array, &[3.0, 5.0]);
1005 assert_eq!(*phase, 0.0);
1006 }
1007 _ => panic!("expected SetDashPattern"),
1008 }
1009 }
1010
1011 #[test]
1012 fn test_inline_image_basic() {
1013 let mut data = Vec::new();
1015 data.extend_from_slice(b"BI /W 10 /H 10 /BPC 8 /CS /G ID ");
1016 data.extend_from_slice(&[0xFF; 10]);
1018 data.extend_from_slice(b" EI");
1019 let ops = tokenize_content_stream(&data).unwrap();
1020 assert_eq!(ops.len(), 1);
1021 match &ops[0] {
1022 Operator::InlineImage { properties, data } => {
1023 assert_eq!(
1024 properties.get(&Name::from("Width")),
1025 Some(&Operand::Integer(10))
1026 );
1027 assert_eq!(
1028 properties.get(&Name::from("Height")),
1029 Some(&Operand::Integer(10))
1030 );
1031 assert_eq!(
1032 properties.get(&Name::from("BitsPerComponent")),
1033 Some(&Operand::Integer(8))
1034 );
1035 assert_eq!(
1036 properties.get(&Name::from("ColorSpace")),
1037 Some(&Operand::Name(Name::from("DeviceGray")))
1038 );
1039 assert_eq!(data.len(), 10);
1040 assert!(data.iter().all(|&b| b == 0xFF));
1041 }
1042 _ => panic!("expected InlineImage"),
1043 }
1044 }
1045
1046 #[test]
1047 fn test_inline_image_abbreviation_expansion() {
1048 let mut data = Vec::new();
1049 data.extend_from_slice(b"BI /W 4 /H 4 /CS /RGB /F /Fl ID ");
1050 data.extend_from_slice(&[0xAA; 4]);
1051 data.extend_from_slice(b" EI");
1052 let ops = tokenize_content_stream(&data).unwrap();
1053 match &ops[0] {
1054 Operator::InlineImage { properties, .. } => {
1055 assert_eq!(
1056 properties.get(&Name::from("ColorSpace")),
1057 Some(&Operand::Name(Name::from("DeviceRGB")))
1058 );
1059 assert_eq!(
1060 properties.get(&Name::from("Filter")),
1061 Some(&Operand::Name(Name::from("FlateDecode")))
1062 );
1063 }
1064 _ => panic!("expected InlineImage"),
1065 }
1066 }
1067
1068 #[test]
1069 fn test_inline_image_binary_data_extraction() {
1070 let mut data = Vec::new();
1072 data.extend_from_slice(b"BI /W 2 /H 2 /BPC 8 /CS /G ID ");
1073 data.extend_from_slice(&[0x45, 0x49, 0x45, 0x49]); data.extend_from_slice(b" EI");
1076 let ops = tokenize_content_stream(&data).unwrap();
1077 match &ops[0] {
1078 Operator::InlineImage { data, .. } => {
1079 assert_eq!(data, &[0x45, 0x49, 0x45, 0x49]);
1080 }
1081 _ => panic!("expected InlineImage"),
1082 }
1083 }
1084
1085 #[test]
1086 fn test_inline_image_tokenizer_resumes_after_ei() {
1087 let mut data = Vec::new();
1088 data.extend_from_slice(b"BI /W 1 /H 1 /BPC 8 /CS /G ID ");
1089 data.extend_from_slice(&[0xAB]);
1090 data.extend_from_slice(b"\nEI\n");
1091 data.extend_from_slice(b"100 200 m");
1092 let ops = tokenize_content_stream(&data).unwrap();
1093 assert_eq!(ops.len(), 2);
1094 assert!(matches!(&ops[0], Operator::InlineImage { .. }));
1095 assert!(matches!(&ops[1], Operator::MoveTo { x, y } if *x == 100.0 && *y == 200.0));
1096 }
1097
1098 #[test]
1099 fn test_tokenize_paint_shading() {
1100 let data = b"/Sh0 sh";
1101 let ops = tokenize_content_stream(data).unwrap();
1102 assert_eq!(ops.len(), 1);
1103 match &ops[0] {
1104 Operator::PaintShading { name } => {
1105 assert_eq!(name.as_bytes(), b"Sh0");
1106 }
1107 _ => panic!("expected PaintShading"),
1108 }
1109 }
1110
1111 #[test]
1112 fn test_tokenize_d0_operator() {
1113 let data = b"500 0 d0";
1114 let ops = tokenize_content_stream(data).unwrap();
1115 assert_eq!(ops.len(), 1);
1116 match &ops[0] {
1117 Operator::SetCharWidth { wx, wy } => {
1118 assert_eq!(*wx, 500.0);
1119 assert_eq!(*wy, 0.0);
1120 }
1121 _ => panic!("expected SetCharWidth"),
1122 }
1123 }
1124
1125 #[test]
1126 fn test_tokenize_d1_operator() {
1127 let data = b"500 0 10 -20 400 700 d1";
1128 let ops = tokenize_content_stream(data).unwrap();
1129 assert_eq!(ops.len(), 1);
1130 match &ops[0] {
1131 Operator::SetCacheDevice {
1132 wx,
1133 wy,
1134 llx,
1135 lly,
1136 urx,
1137 ury,
1138 } => {
1139 assert_eq!(*wx, 500.0);
1140 assert_eq!(*wy, 0.0);
1141 assert_eq!(*llx, 10.0);
1142 assert_eq!(*lly, -20.0);
1143 assert_eq!(*urx, 400.0);
1144 assert_eq!(*ury, 700.0);
1145 }
1146 _ => panic!("expected SetCacheDevice"),
1147 }
1148 }
1149
1150 #[test]
1151 fn test_d0_in_char_proc_stream() {
1152 let data = b"500 0 d0 100 200 m 300 400 l S";
1153 let ops = tokenize_content_stream(data).unwrap();
1154 assert_eq!(ops.len(), 4);
1155 assert!(matches!(ops[0], Operator::SetCharWidth { .. }));
1156 assert!(matches!(ops[1], Operator::MoveTo { .. }));
1157 assert!(matches!(ops[2], Operator::LineTo { .. }));
1158 assert!(matches!(ops[3], Operator::Stroke));
1159 }
1160
1161 #[test]
1169 fn test_find_key_abbreviation() {
1170 assert_eq!(expand_inline_key(b"BPC"), Name::from("BitsPerComponent"));
1171 assert_eq!(expand_inline_key(b"W"), Name::from("Width"));
1172 assert_eq!(expand_inline_key(b"H"), Name::from("Height"));
1173 assert_eq!(expand_inline_key(b"CS"), Name::from("ColorSpace"));
1174 assert_eq!(expand_inline_key(b"F"), Name::from("Filter"));
1175 assert_eq!(expand_inline_key(b"D"), Name::from("Decode"));
1176 assert_eq!(expand_inline_key(b"DP"), Name::from("DecodeParms"));
1177 assert_eq!(expand_inline_key(b"IM"), Name::from("ImageMask"));
1178 assert_eq!(expand_inline_key(b"I"), Name::from("Interpolate"));
1179 assert_eq!(expand_inline_key(b"L"), Name::from("Length"));
1180
1181 assert_eq!(expand_inline_key(b""), Name::from(""));
1183 assert_eq!(expand_inline_key(b"NoInList"), Name::from("NoInList"));
1184
1185 assert_eq!(expand_inline_key(b"WW"), Name::from("WW"));
1187 }
1188
1189 #[test]
1193 fn test_find_value_abbreviation() {
1194 assert_eq!(
1195 expand_inline_name_value(&Name::from("G")),
1196 Name::from("DeviceGray")
1197 );
1198 assert_eq!(
1199 expand_inline_name_value(&Name::from("RGB")),
1200 Name::from("DeviceRGB")
1201 );
1202 assert_eq!(
1203 expand_inline_name_value(&Name::from("CMYK")),
1204 Name::from("DeviceCMYK")
1205 );
1206 assert_eq!(
1207 expand_inline_name_value(&Name::from("DCT")),
1208 Name::from("DCTDecode")
1209 );
1210 assert_eq!(
1211 expand_inline_name_value(&Name::from("Fl")),
1212 Name::from("FlateDecode")
1213 );
1214 assert_eq!(
1215 expand_inline_name_value(&Name::from("AHx")),
1216 Name::from("ASCIIHexDecode")
1217 );
1218 assert_eq!(
1219 expand_inline_name_value(&Name::from("A85")),
1220 Name::from("ASCII85Decode")
1221 );
1222 assert_eq!(
1223 expand_inline_name_value(&Name::from("LZW")),
1224 Name::from("LZWDecode")
1225 );
1226 assert_eq!(
1227 expand_inline_name_value(&Name::from("RL")),
1228 Name::from("RunLengthDecode")
1229 );
1230 assert_eq!(
1231 expand_inline_name_value(&Name::from("CCF")),
1232 Name::from("CCITTFaxDecode")
1233 );
1234 assert_eq!(
1235 expand_inline_name_value(&Name::from("I")),
1236 Name::from("Indexed")
1237 );
1238
1239 assert_eq!(expand_inline_name_value(&Name::from("")), Name::from(""));
1241 assert_eq!(
1242 expand_inline_name_value(&Name::from("NoInList")),
1243 Name::from("NoInList")
1244 );
1245
1246 assert_eq!(
1248 expand_inline_name_value(&Name::from("II")),
1249 Name::from("II")
1250 );
1251 }
1252}