1#[allow(missing_docs)]
34pub mod ops;
35
36use crate::content::ops::TypedInstruction;
37use crate::object::Stream;
38use crate::object::dict::InlineImageDict;
39use crate::object::dict::keys::{
40 ASCII85_DECODE_ABBREVIATION, BITS_PER_COMPONENT, BPC, COLORSPACE, CS, F, FILTER, H, HEIGHT, IM,
41 IMAGE_MASK, W, WIDTH,
42};
43use crate::object::name::{Name, skip_name_like};
44use crate::object::{Array, Number, Object, ObjectLike};
45use crate::reader::Reader;
46use crate::reader::{Readable, ReaderContext, ReaderExt, Skippable};
47use core::fmt::{Debug, Formatter};
48use core::ops::Deref;
49use log::warn;
50use smallvec::SmallVec;
51
52const OPERANDS_THRESHOLD: usize = 6;
56
57fn compute_raw_inline_image_size(dict: &crate::object::dict::Dict<'_>) -> Option<usize> {
63 let has_filter = dict.get::<Name>(F).is_some()
65 || dict.get::<Name>(FILTER).is_some()
66 || dict.get::<Array>(F).is_some()
67 || dict.get::<Array>(FILTER).is_some();
68 if has_filter {
69 return None;
70 }
71
72 let is_image_mask =
74 dict.get::<bool>(IM).unwrap_or(false) || dict.get::<bool>(IMAGE_MASK).unwrap_or(false);
75
76 let w = dict
77 .get::<Number>(W)
78 .or_else(|| dict.get::<Number>(WIDTH))?
79 .as_f64() as usize;
80 let h = dict
81 .get::<Number>(H)
82 .or_else(|| dict.get::<Number>(HEIGHT))?
83 .as_f64() as usize;
84
85 let (bpc, components): (usize, usize) = if is_image_mask {
86 (1, 1)
87 } else {
88 let bpc = dict
89 .get::<Number>(BPC)
90 .or_else(|| dict.get::<Number>(BITS_PER_COMPONENT))
91 .map(|n| n.as_f64() as usize)
92 .unwrap_or(8);
93 let cs_name: Option<Vec<u8>> = dict
94 .get::<Name>(CS)
95 .map(|n| n.as_ref().to_vec())
96 .or_else(|| dict.get::<Name>(COLORSPACE).map(|n| n.as_ref().to_vec()));
97 let components = match cs_name.as_deref() {
98 Some(b"G") | Some(b"DeviceGray") | Some(b"I") | Some(b"Indexed") => 1,
99 Some(b"RGB") | Some(b"DeviceRGB") => 3,
100 Some(b"CMYK") | Some(b"DeviceCMYK") => 4,
101 _ => return None, };
103 (bpc, components)
104 };
105
106 let bits_per_row = w * bpc * components;
108 let stride = bits_per_row.div_ceil(8);
109 Some(h * stride)
110}
111
112fn find_a85_inline_image_end(
122 stream_data: &[u8],
123 dict: &crate::object::dict::Dict<'_>,
124) -> Option<(usize, usize)> {
125 let outermost: Option<Vec<u8>> = dict
127 .get::<Name>(F)
128 .map(|n| n.as_ref().to_vec())
129 .or_else(|| dict.get::<Name>(FILTER).map(|n| n.as_ref().to_vec()))
130 .or_else(|| {
131 dict.get::<Array>(F)
132 .and_then(|a| a.iter::<Name>().next())
133 .map(|n| n.as_ref().to_vec())
134 })
135 .or_else(|| {
136 dict.get::<Array>(FILTER)
137 .and_then(|a| a.iter::<Name>().next())
138 .map(|n| n.as_ref().to_vec())
139 });
140
141 let is_a85 = matches!(
142 outermost.as_deref(),
143 Some(ASCII85_DECODE_ABBREVIATION) | Some(b"ASCII85Decode")
144 );
145 if !is_a85 {
146 return None;
147 }
148
149 let mut i = 0;
151 while i + 2 <= stream_data.len() {
152 if stream_data[i] == b'~' && stream_data[i + 1] == b'>' {
153 let eos_end = i + 2;
154 let mut ei_start = eos_end;
155 while ei_start < stream_data.len()
156 && matches!(stream_data[ei_start], b' ' | b'\t' | b'\n' | b'\r' | 0x0C)
157 {
158 ei_start += 1;
159 }
160 if stream_data.get(ei_start..ei_start + 2) == Some(b"EI") {
161 let after_ei = ei_start + 2;
162 let ei_delimited = after_ei >= stream_data.len()
163 || matches!(stream_data[after_ei], b' ' | b'\t' | b'\n' | b'\r' | 0x0C);
164 if ei_delimited {
165 return Some((eos_end, after_ei));
166 }
167 }
168 }
170 i += 1;
171 }
172 None
173}
174
175impl Debug for Operator {
176 fn fmt(&self, f: &mut Formatter<'_>) -> core::fmt::Result {
177 write!(f, "{}", self.0.as_str())
178 }
179}
180
181#[derive(Clone, PartialEq)]
183pub struct Operator(Name);
184
185impl Deref for Operator {
186 type Target = [u8];
187
188 fn deref(&self) -> &Self::Target {
189 self.0.as_ref()
190 }
191}
192
193impl Skippable for Operator {
194 fn skip(r: &mut Reader<'_>, _: bool) -> Option<()> {
195 skip_name_like(r, false).map(|_| ())
196 }
197}
198
199impl Readable<'_> for Operator {
200 fn read(r: &mut Reader<'_>, _: &ReaderContext<'_>) -> Option<Self> {
201 let start = r.offset();
202 skip_name_like(r, false)?;
203 let end = r.offset();
204 let data = r.range(start..end)?;
205
206 if data.is_empty() {
207 return None;
208 }
209
210 Some(Self(Name::new(data)))
211 }
212}
213
214#[derive(Clone)]
216pub struct UntypedIter<'a> {
217 reader: Reader<'a>,
218 stack: Stack<'a>,
219}
220
221impl<'a> UntypedIter<'a> {
222 pub fn new(data: &'a [u8]) -> Self {
224 Self {
225 reader: Reader::new(data),
226 stack: Stack::new(),
227 }
228 }
229
230 pub fn empty() -> Self {
232 Self {
233 reader: Reader::new(&[]),
234 stack: Stack::new(),
235 }
236 }
237}
238
239impl<'a> Iterator for UntypedIter<'a> {
240 type Item = Instruction<'a>;
241
242 fn next(&mut self) -> Option<Self::Item> {
243 self.stack.clear();
244
245 self.reader.skip_white_spaces_and_comments();
246
247 while !self.reader.at_end() {
248 if matches!(
250 self.reader.peek_byte()?,
251 b'/' | b'.' | b'+' | b'-' | b'0'..=b'9' | b'[' | b'<' | b'('
252 ) {
253 if let Some(object) = self.reader.read_without_context::<Object<'_>>() {
261 self.stack.push(object);
262 } else if self.reader.read_without_context::<Operator>().is_some() {
263 self.stack.clear();
264 } else {
265 return None;
266 }
267 } else {
268 let operator = match self.reader.read_without_context::<Operator>() {
269 Some(o) => o,
270 None => {
271 warn!("failed to read operator in content stream");
272
273 self.reader.jump_to_end();
274 return None;
275 }
276 };
277
278 if operator.as_ref() == b"BI" {
280 let inline_dict = self.reader.read_without_context::<InlineImageDict<'_>>()?;
282 let dict = inline_dict.get_dict().clone();
283
284 self.reader.read_white_space()?;
286
287 let stream_data = self.reader.tail()?;
288 let start_offset = self.reader.offset();
289
290 if let Some((image_end, advance)) =
294 find_a85_inline_image_end(stream_data, &dict)
295 {
296 let image_data = &stream_data[..image_end];
297 let stream = Stream::new(image_data, dict.clone(), u64::MAX);
299 self.stack.push(Object::Stream(stream));
300 self.reader.read_bytes(advance)?;
301 self.reader.skip_white_spaces();
302
303 return Some(Instruction {
304 operands: core::mem::take(&mut self.stack),
305 operator,
306 });
307 }
308
309 if let Some(raw_size) = compute_raw_inline_image_size(&dict)
315 && stream_data.len() >= raw_size
316 {
317 let image_data = &stream_data[..raw_size];
318 let stream = Stream::new(image_data, dict.clone(), u64::MAX);
320 self.stack.push(Object::Stream(stream));
321 self.reader.read_bytes(raw_size)?;
323 self.reader.skip_white_spaces();
324 let _ = self.reader.read_bytes(2);
326 self.reader.skip_white_spaces();
327
328 return Some(Instruction {
329 operands: core::mem::take(&mut self.stack),
330 operator,
331 });
332 }
333
334 'outer: while let Some(bytes) = self.reader.peek_bytes(2) {
335 if bytes == b"EI" {
336 let end_offset = self.reader.offset() - start_offset;
337 let image_data = &stream_data[..end_offset];
338
339 let stream = Stream::new(image_data, dict.clone(), u64::MAX);
341
342 let tail = &self.reader.tail()?[2..];
349 let mut find_reader = Reader::new(tail);
350
351 while let Some(bytes) = find_reader.peek_bytes(2) {
352 if bytes == b"EI" {
353 let analyze_data = &tail;
354
355 if analyze_data.iter().any(|c| !c.is_ascii()) {
358 self.reader.read_bytes(2)?;
359 continue 'outer;
360 }
361
362 let iter = TypedIter::new(tail);
371 let mut found = false;
372
373 for (counter, op) in iter.enumerate() {
374 if counter >= 20 {
380 found = true;
381 break;
382 }
383
384 if matches!(
385 op,
386 TypedInstruction::NextLineAndShowText(_)
387 | TypedInstruction::ShowText(_)
388 | TypedInstruction::ShowTexts(_)
389 | TypedInstruction::ShowTextWithParameters(_)
390 ) {
391 found = true;
394 break;
395 }
396 }
397
398 if !found {
399 self.reader.read_bytes(2)?;
402 continue 'outer;
403 }
404 } else if bytes == b"BI" {
405 let mut cloned = find_reader.clone();
408 cloned.read_bytes(2)?;
409 if cloned
410 .read_without_context::<InlineImageDict<'_>>()
411 .is_some()
412 {
413 break;
414 }
415 }
416
417 find_reader.read_byte()?;
418 }
419
420 self.stack.push(Object::Stream(stream));
421
422 self.reader.read_bytes(2)?;
423 self.reader.skip_white_spaces();
424
425 break;
426 } else {
427 self.reader.read_byte()?;
428 }
429 }
430 }
431
432 return Some(Instruction {
433 operands: core::mem::take(&mut self.stack),
434 operator,
435 });
436 }
437
438 self.reader.skip_white_spaces_and_comments();
439 }
440
441 None
442 }
443}
444
445#[derive(Clone)]
448pub struct TypedIter<'a> {
449 untyped: UntypedIter<'a>,
450}
451
452impl<'a> TypedIter<'a> {
453 pub fn new(data: &'a [u8]) -> Self {
455 Self {
456 untyped: UntypedIter::new(data),
457 }
458 }
459
460 pub(crate) fn from_untyped(untyped: UntypedIter<'a>) -> Self {
461 Self { untyped }
462 }
463}
464
465impl<'a> Iterator for TypedIter<'a> {
466 type Item = TypedInstruction<'a>;
467
468 fn next(&mut self) -> Option<Self::Item> {
469 let op = &self.untyped.next()?;
470 match TypedInstruction::dispatch(op) {
471 Some(op) => Some(op),
472 None => {
477 if [
478 &b"m"[..],
479 &b"l"[..],
480 &b"c"[..],
481 &b"v"[..],
482 &b"y"[..],
483 &b"h"[..],
484 &b"re"[..],
485 ]
486 .contains(&op.operator.0.deref())
487 {
488 None
489 } else {
490 Some(TypedInstruction::Fallback(op.operator.clone()))
491 }
492 }
493 }
494 }
495}
496
497pub struct Instruction<'a> {
499 pub operands: Stack<'a>,
501 pub operator: Operator,
503}
504
505impl<'a> Instruction<'a> {
506 pub fn operands(self) -> OperandIterator<'a> {
508 OperandIterator::new(self.operands)
509 }
510}
511
512#[derive(Debug, Clone, PartialEq)]
514pub struct Stack<'a>(SmallVec<[Object<'a>; OPERANDS_THRESHOLD]>);
515
516impl<'a> Default for Stack<'a> {
517 fn default() -> Self {
518 Self::new()
519 }
520}
521
522impl<'a> Stack<'a> {
523 pub fn new() -> Self {
525 Self(SmallVec::new())
526 }
527
528 fn push(&mut self, operand: Object<'a>) {
529 self.0.push(operand);
530 }
531
532 fn clear(&mut self) {
533 self.0.clear();
534 }
535
536 fn len(&self) -> usize {
537 self.0.len()
538 }
539
540 fn get<T>(&self, index: usize) -> Option<T>
541 where
542 T: ObjectLike<'a>,
543 {
544 self.0.get(index).and_then(|e| e.clone().cast::<T>())
545 }
546
547 fn get_all<T>(&self) -> Option<SmallVec<[T; OPERANDS_THRESHOLD]>>
548 where
549 T: ObjectLike<'a>,
550 {
551 let mut operands = SmallVec::new();
552
553 for op in &self.0 {
554 let converted = op.clone().cast::<T>()?;
555 operands.push(converted);
556 }
557
558 Some(operands)
559 }
560}
561
562pub struct OperandIterator<'a> {
564 stack: Stack<'a>,
565 cur_index: usize,
566}
567
568impl<'a> OperandIterator<'a> {
569 fn new(stack: Stack<'a>) -> Self {
570 Self {
571 stack,
572 cur_index: 0,
573 }
574 }
575}
576
577impl<'a> Iterator for OperandIterator<'a> {
578 type Item = Object<'a>;
579
580 fn next(&mut self) -> Option<Self::Item> {
581 if let Some(item) = self.stack.get::<Object<'a>>(self.cur_index) {
582 self.cur_index += 1;
583
584 Some(item)
585 } else {
586 None
587 }
588 }
589}
590
591pub(crate) trait OperatorTrait<'a>
592where
593 Self: Sized + Into<TypedInstruction<'a>> + TryFrom<TypedInstruction<'a>>,
594{
595 const OPERATOR: &'static str;
596
597 fn from_stack(stack: &Stack<'a>) -> Option<Self>;
598}
599
600mod macros {
601 macro_rules! op_impl {
602 ($t:ident $(<$l:lifetime>),*, $e:expr, $n:expr, $body:expr) => {
603 impl<'a> OperatorTrait<'a> for $t$(<$l>),* {
604 const OPERATOR: &'static str = $e;
605
606 fn from_stack(stack: &Stack<'a>) -> Option<Self> {
607 if $n != u8::MAX as usize {
608 if stack.len() != $n {
609 warn!("wrong stack length {} for operator {}, expected {}", stack.len(), Self::OPERATOR, $n);
610 }
611 }
612
613 $body(stack).or_else(|| {
614 warn!("failed to convert operands for operator {}", Self::OPERATOR);
615
616 None
617 })
618 }
619 }
620
621 impl<'a> From<$t$(<$l>),*> for TypedInstruction<'a> {
622 fn from(value: $t$(<$l>),*) -> Self {
623 TypedInstruction::$t(value)
624 }
625 }
626
627 impl<'a> TryFrom<TypedInstruction<'a>> for $t$(<$l>),* {
628 type Error = ();
629
630 fn try_from(value: TypedInstruction<'a>) -> core::result::Result<Self, Self::Error> {
631 match value {
632 TypedInstruction::$t(e) => Ok(e),
633 _ => Err(())
634 }
635 }
636 }
637 };
638 }
639
640 macro_rules! op0 {
646 ($t:ident $(<$l:lifetime>),*, $e:expr) => {
647 crate::content::macros::op_impl!($t$(<$l>),*, $e, 0, |_| Some(Self));
648 }
649 }
650
651 macro_rules! op1 {
652 ($t:ident $(<$l:lifetime>),*, $e:expr) => {
653 crate::content::macros::op_impl!($t$(<$l>),*, $e, 1, |stack: &Stack<'a>| {
654 let shift = stack.len().saturating_sub(1);
655 Some(Self(stack.get(0 + shift)?))
656 });
657 }
658 }
659
660 macro_rules! op_all {
661 ($t:ident $(<$l:lifetime>),*, $e:expr) => {
662 crate::content::macros::op_impl!($t$(<$l>),*, $e, u8::MAX as usize, |stack: &Stack<'a>|
663 Some(Self(stack.get_all()?)));
664 }
665 }
666
667 macro_rules! op2 {
668 ($t:ident $(<$l:lifetime>),*, $e:expr) => {
669 crate::content::macros::op_impl!($t$(<$l>),*, $e, 2, |stack: &Stack<'a>| {
670 let shift = stack.len().saturating_sub(2);
671 Some(Self(stack.get(0 + shift)?, stack.get(1 + shift)?))
672 });
673 }
674 }
675
676 macro_rules! op3 {
677 ($t:ident $(<$l:lifetime>),*, $e:expr) => {
678 crate::content::macros::op_impl!($t$(<$l>),*, $e, 3, |stack: &Stack<'a>| {
679 let shift = stack.len().saturating_sub(3);
680 Some(Self(stack.get(0 + shift)?, stack.get(1 + shift)?,
681 stack.get(2 + shift)?))
682 });
683 }
684 }
685
686 macro_rules! op4 {
687 ($t:ident $(<$l:lifetime>),*, $e:expr) => {
688 crate::content::macros::op_impl!($t$(<$l>),*, $e, 4, |stack: &Stack<'a>| {
689 let shift = stack.len().saturating_sub(4);
690 Some(Self(stack.get(0 + shift)?, stack.get(1 + shift)?,
691 stack.get(2 + shift)?, stack.get(3 + shift)?))
692 });
693 }
694 }
695
696 macro_rules! op6 {
697 ($t:ident $(<$l:lifetime>),*, $e:expr) => {
698 crate::content::macros::op_impl!($t$(<$l>),*, $e, 6, |stack: &Stack<'a>| {
699 let shift = stack.len().saturating_sub(6);
700 Some(Self(stack.get(0 + shift)?, stack.get(1 + shift)?,
701 stack.get(2 + shift)?, stack.get(3 + shift)?,
702 stack.get(4 + shift)?, stack.get(5 + shift)?))
703 });
704 }
705 }
706
707 pub(crate) use op_all;
708 pub(crate) use op_impl;
709 pub(crate) use op0;
710 pub(crate) use op1;
711 pub(crate) use op2;
712 pub(crate) use op3;
713 pub(crate) use op4;
714 pub(crate) use op6;
715}