1#[allow(missing_docs)]
34pub mod ops;
35
36use crate::content::ops::TypedInstruction;
37use crate::object;
38use crate::object::dict::InlineImageDict;
39use crate::object::name::{Name, skip_name_like};
40use crate::object::{Array, Null, Number, Object, Stream};
41use crate::reader::Reader;
42use crate::reader::{Readable, ReaderContext, ReaderExt, Skippable};
43use crate::trivia::is_white_space_character;
44use crate::util::find_needle;
45use core::array;
46use core::fmt::{Debug, Formatter};
47use core::ops::Deref;
48use smallvec::SmallVec;
49
50const OPERANDS_THRESHOLD: usize = 10;
54
55impl Debug for Operator<'_> {
56 fn fmt(&self, f: &mut Formatter<'_>) -> core::fmt::Result {
57 write!(f, "{}", self.0.as_str())
58 }
59}
60
61#[derive(Clone, PartialEq)]
63pub struct Operator<'a>(Name<'a>);
64
65impl Deref for Operator<'_> {
66 type Target = [u8];
67
68 fn deref(&self) -> &Self::Target {
69 self.0.as_ref()
70 }
71}
72
73impl Skippable for Operator<'_> {
74 fn skip(r: &mut Reader<'_>, _: bool) -> Option<()> {
75 skip_name_like(r, false).map(|_| ())
76 }
77}
78
79impl<'a> Readable<'a> for Operator<'a> {
80 fn read(r: &mut Reader<'a>, _: &ReaderContext<'a>) -> Option<Self> {
81 let start = r.offset();
82 skip_name_like(r, false)?;
83 let end = r.offset();
84 let data = r.range(start..end)?;
85
86 if data.is_empty() {
87 return None;
88 }
89
90 Some(Self(Name::new(data)?))
91 }
92}
93
94#[derive(Clone)]
96pub struct UntypedIter<'a> {
97 reader: Reader<'a>,
98 stack: Stack<'a>,
99 operator: Option<Operator<'a>>,
100}
101
102impl<'a> UntypedIter<'a> {
103 pub fn new(data: &'a [u8]) -> Self {
105 Self {
106 reader: Reader::new(data),
107 stack: Stack::new(),
108 operator: None,
109 }
110 }
111
112 pub fn empty() -> Self {
114 Self {
115 reader: Reader::new(&[]),
116 stack: Stack::new(),
117 operator: None,
118 }
119 }
120
121 #[allow(clippy::should_implement_trait)]
123 pub fn next(&mut self) -> Option<Instruction<'_, 'a>> {
124 self.stack.clear();
125 self.operator = None;
126
127 self.reader.skip_white_spaces_and_comments();
128
129 while !self.reader.at_end() {
130 if matches!(
132 self.reader.peek_byte()?,
133 b'/' | b'.' | b'+' | b'-' | b'0'..=b'9' | b'[' | b'<' | b'('
134 ) {
135 if let Some(object) = self.reader.read_without_context::<Object<'_>>() {
143 self.stack.push(object)?;
144 } else if self.reader.read_without_context::<Operator<'_>>().is_some() {
145 self.stack.clear();
146 } else {
147 return None;
148 }
149 } else {
150 let operator = match self.reader.read_without_context::<Operator<'_>>() {
151 Some(o) => o,
152 None => {
153 warn!("failed to read operator in content stream");
154
155 self.reader.jump_to_end();
156 return None;
157 }
158 };
159
160 if operator.as_ref() == b"BI" {
162 let inline_dict = self.reader.read_without_context::<InlineImageDict<'_>>()?;
164 let dict = inline_dict.get_dict().clone();
165
166 self.reader.read_white_space()?;
168
169 let stream_data = self.reader.tail()?;
170 let start_offset = self.reader.offset();
171
172 'outer: while let Some(pos) = find_needle(self.reader.tail()?, b"EI") {
173 self.reader.read_bytes(pos)?;
174
175 if self.reader.peek_bytes(2) == Some(b"EI") {
176 if self
178 .reader
179 .peek_bytes(3)
180 .is_some_and(|b| !is_white_space_character(b[2]))
181 {
182 self.reader.read_bytes(3)?;
183
184 continue;
185 }
186
187 let end_offset = self.reader.offset() - start_offset;
188 let image_data = &stream_data[..end_offset];
189
190 let stream = Stream::new(image_data, dict.clone());
191
192 let tail = &self.reader.tail()?[2..];
199 let mut find_reader = Reader::new(tail);
200
201 while !find_reader.at_end() {
202 let remaining = find_reader.tail()?;
203 let next_ei = find_needle(remaining, b"EI");
204 let next_bi = find_needle(remaining, b"BI");
205
206 let (next_pos, is_ei) = match (next_ei, next_bi) {
207 (Some(ei), Some(bi)) if ei <= bi => (ei, true),
208 (Some(_), Some(bi)) => (bi, false),
209 (Some(ei), None) => (ei, true),
210 (None, Some(bi)) => (bi, false),
211 (None, None) => break,
212 };
213
214 find_reader.read_bytes(next_pos)?;
215
216 if is_ei {
217 let analyze_data = &tail[..find_reader.offset()];
218
219 if analyze_data.iter().any(|c| !c.is_ascii()) {
222 self.reader.read_bytes(2)?;
223 continue 'outer;
224 }
225
226 let mut iter = TypedIter::new(tail);
235 let mut found = false;
236 let mut counter = 0;
237
238 while let Some(op) = iter.next() {
239 if counter >= 20 {
245 found = true;
246 break;
247 }
248
249 if matches!(
250 op,
251 TypedInstruction::NextLineAndShowText(_)
252 | TypedInstruction::ShowText(_)
253 | TypedInstruction::ShowTexts(_)
254 | TypedInstruction::ShowTextWithParameters(_)
255 ) {
256 found = true;
259 break;
260 }
261
262 counter += 1;
263 }
264
265 if !found {
266 self.reader.read_bytes(2)?;
269 continue 'outer;
270 }
271 } else {
272 let mut cloned = find_reader.clone();
275 cloned.read_bytes(2)?;
276 if cloned
277 .read_without_context::<InlineImageDict<'_>>()
278 .is_some()
279 {
280 break;
281 }
282 }
283
284 find_reader.read_byte()?;
285 }
286
287 self.stack.push(Object::Stream(stream))?;
288
289 self.reader.read_bytes(2)?;
290 self.reader.skip_white_spaces();
291
292 break;
293 }
294 }
295 }
296
297 self.operator = Some(operator);
298 return Some(Instruction {
299 operands: &self.stack,
300 operator: self.operator.as_ref().unwrap(),
301 });
302 }
303
304 self.reader.skip_white_spaces_and_comments();
305 }
306
307 None
308 }
309}
310
311#[derive(Clone)]
314pub struct TypedIter<'a> {
315 untyped: UntypedIter<'a>,
316}
317
318impl<'a> TypedIter<'a> {
319 pub fn new(data: &'a [u8]) -> Self {
321 Self {
322 untyped: UntypedIter::new(data),
323 }
324 }
325
326 pub(crate) fn from_untyped(untyped: UntypedIter<'a>) -> Self {
327 Self { untyped }
328 }
329
330 #[allow(clippy::should_implement_trait)]
332 pub fn next(&mut self) -> Option<TypedInstruction<'_, 'a>> {
333 let op = self.untyped.next()?;
334 match TypedInstruction::dispatch(&op) {
336 Some(op) => Some(op),
337 None => {
342 if [
343 &b"m"[..],
344 &b"l"[..],
345 &b"c"[..],
346 &b"v"[..],
347 &b"y"[..],
348 &b"h"[..],
349 &b"re"[..],
350 ]
351 .contains(&op.operator.0.deref())
352 {
353 None
354 } else {
355 Some(TypedInstruction::Fallback(op.operator))
356 }
357 }
358 }
359 }
360}
361
362pub struct Instruction<'b, 'a> {
364 pub operands: &'b Stack<'a>,
366 pub operator: &'b Operator<'a>,
368}
369
370impl<'b, 'a> Instruction<'b, 'a> {
371 pub fn operands(&self) -> OperandIterator<'b, 'a> {
373 OperandIterator::new(self.operands)
374 }
375}
376
377pub struct Stack<'a> {
379 data: [Object<'a>; OPERANDS_THRESHOLD],
382 len: usize,
383}
384
385impl<'a> Default for Stack<'a> {
386 fn default() -> Self {
387 Self::new()
388 }
389}
390
391impl<'a> Stack<'a> {
392 pub fn new() -> Self {
394 Self {
395 data: array::from_fn(|_| Object::Null(Null)),
396 len: 0,
397 }
398 }
399
400 fn push(&mut self, operand: Object<'a>) -> Option<()> {
401 if self.len >= OPERANDS_THRESHOLD {
402 return None;
403 }
404
405 self.data[self.len] = operand;
406 self.len += 1;
407 Some(())
408 }
409
410 fn clear(&mut self) {
411 self.len = 0;
412 }
413
414 fn len(&self) -> usize {
415 self.len
416 }
417
418 fn as_slice(&self) -> &[Object<'a>] {
419 &self.data[..self.len]
420 }
421
422 fn get<'b, T>(&'b self, index: usize) -> Option<T>
423 where
424 T: Operand<'b, 'a>,
425 {
426 self.as_slice().get(index).and_then(T::from_object)
427 }
428
429 fn get_all<'b, T>(&'b self) -> Option<SmallVec<[T; OPERANDS_THRESHOLD]>>
430 where
431 T: Operand<'b, 'a>,
432 {
433 let mut operands = SmallVec::new();
434
435 for op in self.as_slice() {
436 let converted = T::from_object(op)?;
437 operands.push(converted);
438 }
439
440 Some(operands)
441 }
442}
443
444impl Debug for Stack<'_> {
445 fn fmt(&self, f: &mut Formatter<'_>) -> core::fmt::Result {
446 f.debug_list().entries(self.as_slice()).finish()
447 }
448}
449
450impl Clone for Stack<'_> {
451 fn clone(&self) -> Self {
452 let mut stack = Self::new();
453 for item in self.as_slice() {
454 stack.push(item.clone()).unwrap();
455 }
456 stack
457 }
458}
459
460impl PartialEq for Stack<'_> {
461 fn eq(&self, other: &Self) -> bool {
462 self.as_slice() == other.as_slice()
463 }
464}
465
466trait Operand<'b, 'a>: Sized {
467 fn from_object(object: &'b Object<'a>) -> Option<Self>;
468}
469
470impl<'b, 'a> Operand<'b, 'a> for Number {
471 fn from_object(object: &'b Object<'a>) -> Option<Self> {
472 match object {
473 Object::Number(n) => Some(*n),
474 _ => None,
475 }
476 }
477}
478
479impl<'b, 'a> Operand<'b, 'a> for &'b object::String<'a> {
480 fn from_object(object: &'b Object<'a>) -> Option<Self> {
481 match object {
482 Object::String(s) => Some(s),
483 _ => None,
484 }
485 }
486}
487
488impl<'b, 'a> Operand<'b, 'a> for &'b Name<'a> {
489 fn from_object(object: &'b Object<'a>) -> Option<Self> {
490 match object {
491 Object::Name(n) => Some(n),
492 _ => None,
493 }
494 }
495}
496
497impl<'b, 'a> Operand<'b, 'a> for &'b Array<'a> {
498 fn from_object(object: &'b Object<'a>) -> Option<Self> {
499 match object {
500 Object::Array(a) => Some(a),
501 _ => None,
502 }
503 }
504}
505
506impl<'b, 'a> Operand<'b, 'a> for &'b Stream<'a> {
507 fn from_object(object: &'b Object<'a>) -> Option<Self> {
508 match object {
509 Object::Stream(s) => Some(s),
510 _ => None,
511 }
512 }
513}
514
515impl<'b, 'a> Operand<'b, 'a> for &'b Object<'a> {
516 fn from_object(object: &'b Object<'a>) -> Option<Self> {
517 Some(object)
518 }
519}
520
521pub struct OperandIterator<'b, 'a> {
523 stack: &'b Stack<'a>,
524 cur_index: usize,
525}
526
527impl<'b, 'a> OperandIterator<'b, 'a> {
528 fn new(stack: &'b Stack<'a>) -> Self {
529 Self {
530 stack,
531 cur_index: 0,
532 }
533 }
534}
535
536impl<'b, 'a> Iterator for OperandIterator<'b, 'a> {
537 type Item = &'b Object<'a>;
538
539 fn next(&mut self) -> Option<Self::Item> {
540 if let Some(item) = self.stack.as_slice().get(self.cur_index) {
541 self.cur_index += 1;
542
543 Some(item)
544 } else {
545 None
546 }
547 }
548}
549
550pub(crate) trait OperatorTrait<'b, 'a>: Sized {
551 const OPERATOR: &'static str;
552
553 fn from_stack(stack: &'b Stack<'a>) -> Option<Self>;
554}
555
556mod macros {
557 macro_rules! op_impl {
558 ($t:ident $(<$($l:lifetime),+>)?, $e:expr, $n:expr, |$stack:ident : $stack_ty:ty| $body:block) => {
559 impl<'b, 'a> OperatorTrait<'b, 'a> for $t$(<$($l),+>)? {
560 const OPERATOR: &'static str = $e;
561
562 #[inline(always)]
563 fn from_stack($stack: $stack_ty) -> Option<Self> {
564 $body.or_else(|| {
565 warn!("failed to convert operands for operator {}", Self::OPERATOR);
566
567 None
568 })
569 }
570 }
571
572 impl<'b, 'a> From<$t$(<$($l),+>)?> for TypedInstruction<'b, 'a> {
573 fn from(value: $t$(<$($l),+>)?) -> Self {
574 TypedInstruction::$t(value)
575 }
576 }
577
578 impl<'b, 'a> TryFrom<TypedInstruction<'b, 'a>> for $t$(<$($l),+>)? {
579 type Error = ();
580
581 fn try_from(value: TypedInstruction<'b, 'a>) -> core::result::Result<Self, Self::Error> {
582 match value {
583 TypedInstruction::$t(e) => Ok(e),
584 _ => Err(())
585 }
586 }
587 }
588 };
589 }
590
591 macro_rules! op0 {
597 ($t:ident $(<$($l:lifetime),+>)?, $e:expr) => {
598 crate::content::macros::op_impl!($t$(<$($l),+>)?, $e, 0, |_stack: &'b Stack<'a>| {
599 Some(Self)
600 });
601 }
602 }
603
604 macro_rules! op1 {
605 ($t:ident $(<$($l:lifetime),+>)?, $e:expr) => {
606 crate::content::macros::op_impl!($t$(<$($l),+>)?, $e, 1, |stack: &'b Stack<'a>| {
607 let shift = stack.len().saturating_sub(1);
608 Some(Self(stack.get(0 + shift)?))
609 });
610 }
611 }
612
613 macro_rules! op_all {
614 ($t:ident $(<$($l:lifetime),+>)?, $e:expr) => {
615 crate::content::macros::op_impl!($t$(<$($l),+>)?, $e, u8::MAX as usize, |stack: &'b Stack<'a>| {
616 Some(Self(stack.get_all()?))
617 });
618 }
619 }
620
621 macro_rules! op2 {
622 ($t:ident $(<$($l:lifetime),+>)?, $e:expr) => {
623 crate::content::macros::op_impl!($t$(<$($l),+>)?, $e, 2, |stack: &'b Stack<'a>| {
624 let shift = stack.len().saturating_sub(2);
625 Some(Self(stack.get(0 + shift)?, stack.get(1 + shift)?))
626 });
627 }
628 }
629
630 macro_rules! op3 {
631 ($t:ident $(<$($l:lifetime),+>)?, $e:expr) => {
632 crate::content::macros::op_impl!($t$(<$($l),+>)?, $e, 3, |stack: &'b Stack<'a>| {
633 let shift = stack.len().saturating_sub(3);
634 Some(Self(stack.get(0 + shift)?, stack.get(1 + shift)?,
635 stack.get(2 + shift)?))
636 });
637 }
638 }
639
640 macro_rules! op4 {
641 ($t:ident $(<$($l:lifetime),+>)?, $e:expr) => {
642 crate::content::macros::op_impl!($t$(<$($l),+>)?, $e, 4, |stack: &'b Stack<'a>| {
643 let shift = stack.len().saturating_sub(4);
644 Some(Self(stack.get(0 + shift)?, stack.get(1 + shift)?,
645 stack.get(2 + shift)?, stack.get(3 + shift)?))
646 });
647 }
648 }
649
650 macro_rules! op6 {
651 ($t:ident $(<$($l:lifetime),+>)?, $e:expr) => {
652 crate::content::macros::op_impl!($t$(<$($l),+>)?, $e, 6, |stack: &'b Stack<'a>| {
653 let shift = stack.len().saturating_sub(6);
654 Some(Self(stack.get(0 + shift)?, stack.get(1 + shift)?,
655 stack.get(2 + shift)?, stack.get(3 + shift)?,
656 stack.get(4 + shift)?, stack.get(5 + shift)?))
657 });
658 }
659 }
660
661 pub(crate) use op_all;
662 pub(crate) use op_impl;
663 pub(crate) use op0;
664 pub(crate) use op1;
665 pub(crate) use op2;
666 pub(crate) use op3;
667 pub(crate) use op4;
668 pub(crate) use op6;
669}