1#[allow(missing_docs)]
34pub mod ops;
35
36use crate::content::ops::TypedInstruction;
37use crate::object::Stream;
38use crate::object::dict::InlineImageDict;
39use crate::object::dict::keys::{
40 ASCII85_DECODE_ABBREVIATION, BITS_PER_COMPONENT, BPC, COLORSPACE, CS, F, FILTER, H, HEIGHT, IM,
41 IMAGE_MASK, W, WIDTH,
42};
43use crate::object::name::{Name, skip_name_like};
44use crate::object::{Array, Number, Object, ObjectLike};
45use crate::reader::Reader;
46use crate::reader::{Readable, ReaderContext, ReaderExt, Skippable};
47use core::fmt::{Debug, Formatter};
48use core::ops::Deref;
49use log::warn;
50use smallvec::SmallVec;
51
52const OPERANDS_THRESHOLD: usize = 6;
56
57fn compute_raw_inline_image_size(dict: &crate::object::dict::Dict<'_>) -> Option<usize> {
63 let has_filter = dict.get::<Name>(F).is_some()
65 || dict.get::<Name>(FILTER).is_some()
66 || dict.get::<Array>(F).is_some()
67 || dict.get::<Array>(FILTER).is_some();
68 if has_filter {
69 return None;
70 }
71
72 let is_image_mask =
74 dict.get::<bool>(IM).unwrap_or(false) || dict.get::<bool>(IMAGE_MASK).unwrap_or(false);
75
76 let w = dict
77 .get::<Number>(W)
78 .or_else(|| dict.get::<Number>(WIDTH))?
79 .as_f64() as usize;
80 let h = dict
81 .get::<Number>(H)
82 .or_else(|| dict.get::<Number>(HEIGHT))?
83 .as_f64() as usize;
84
85 let (bpc, components): (usize, usize) = if is_image_mask {
86 (1, 1)
87 } else {
88 let bpc = dict
89 .get::<Number>(BPC)
90 .or_else(|| dict.get::<Number>(BITS_PER_COMPONENT))
91 .map(|n| n.as_f64() as usize)
92 .unwrap_or(8);
93 let cs_name: Option<Vec<u8>> = dict
94 .get::<Name>(CS)
95 .map(|n| n.as_ref().to_vec())
96 .or_else(|| dict.get::<Name>(COLORSPACE).map(|n| n.as_ref().to_vec()));
97 let components = match cs_name.as_deref() {
98 Some(b"G") | Some(b"DeviceGray") | Some(b"I") | Some(b"Indexed") => 1,
99 Some(b"RGB") | Some(b"DeviceRGB") => 3,
100 Some(b"CMYK") | Some(b"DeviceCMYK") => 4,
101 _ => return None, };
103 (bpc, components)
104 };
105
106 let bits_per_row = w * bpc * components;
108 let stride = bits_per_row.div_ceil(8);
109 Some(h * stride)
110}
111
112fn find_a85_inline_image_end(
122 stream_data: &[u8],
123 dict: &crate::object::dict::Dict<'_>,
124) -> Option<(usize, usize)> {
125 let outermost: Option<Vec<u8>> = dict
127 .get::<Name>(F)
128 .map(|n| n.as_ref().to_vec())
129 .or_else(|| dict.get::<Name>(FILTER).map(|n| n.as_ref().to_vec()))
130 .or_else(|| {
131 dict.get::<Array>(F)
132 .and_then(|a| a.iter::<Name>().next())
133 .map(|n| n.as_ref().to_vec())
134 })
135 .or_else(|| {
136 dict.get::<Array>(FILTER)
137 .and_then(|a| a.iter::<Name>().next())
138 .map(|n| n.as_ref().to_vec())
139 });
140
141 let is_a85 = matches!(
142 outermost.as_deref(),
143 Some(ASCII85_DECODE_ABBREVIATION) | Some(b"ASCII85Decode")
144 );
145 if !is_a85 {
146 return None;
147 }
148
149 let mut i = 0;
151 while i + 2 <= stream_data.len() {
152 if stream_data[i] == b'~' && stream_data[i + 1] == b'>' {
153 let eos_end = i + 2;
154 let mut ei_start = eos_end;
155 while ei_start < stream_data.len()
156 && matches!(stream_data[ei_start], b' ' | b'\t' | b'\n' | b'\r' | 0x0C)
157 {
158 ei_start += 1;
159 }
160 if stream_data.get(ei_start..ei_start + 2) == Some(b"EI") {
161 let after_ei = ei_start + 2;
162 let ei_delimited = after_ei >= stream_data.len()
163 || matches!(stream_data[after_ei], b' ' | b'\t' | b'\n' | b'\r' | 0x0C);
164 if ei_delimited {
165 return Some((eos_end, after_ei));
166 }
167 }
168 }
170 i += 1;
171 }
172 None
173}
174
175impl Debug for Operator {
176 fn fmt(&self, f: &mut Formatter<'_>) -> core::fmt::Result {
177 write!(f, "{}", self.0.as_str())
178 }
179}
180
181#[derive(Clone, PartialEq)]
183pub struct Operator(Name);
184
185impl Deref for Operator {
186 type Target = [u8];
187
188 fn deref(&self) -> &Self::Target {
189 self.0.as_ref()
190 }
191}
192
193impl Skippable for Operator {
194 fn skip(r: &mut Reader<'_>, _: bool) -> Option<()> {
195 skip_name_like(r, false).map(|_| ())
196 }
197}
198
199impl Readable<'_> for Operator {
200 fn read(r: &mut Reader<'_>, _: &ReaderContext<'_>) -> Option<Self> {
201 let start = r.offset();
202 skip_name_like(r, false)?;
203 let end = r.offset();
204 let data = r.range(start..end)?;
205
206 if data.is_empty() {
207 return None;
208 }
209
210 Some(Self(Name::new(data)))
211 }
212}
213
214#[derive(Clone)]
216pub struct UntypedIter<'a> {
217 reader: Reader<'a>,
218 stack: Stack<'a>,
219}
220
221impl<'a> UntypedIter<'a> {
222 pub fn new(data: &'a [u8]) -> Self {
224 Self {
225 reader: Reader::new(data),
226 stack: Stack::new(),
227 }
228 }
229
230 pub fn empty() -> Self {
232 Self {
233 reader: Reader::new(&[]),
234 stack: Stack::new(),
235 }
236 }
237}
238
239impl<'a> Iterator for UntypedIter<'a> {
240 type Item = Instruction<'a>;
241
242 fn next(&mut self) -> Option<Self::Item> {
243 self.stack.clear();
244
245 self.reader.skip_white_spaces_and_comments();
246
247 while !self.reader.at_end() {
248 if matches!(
250 self.reader.peek_byte()?,
251 b'/' | b'.' | b'+' | b'-' | b'0'..=b'9' | b'[' | b'<' | b'('
252 ) {
253 if let Some(object) = self.reader.read_without_context::<Object<'_>>() {
261 self.stack.push(object);
262 } else if self.reader.read_without_context::<Operator>().is_some() {
263 self.stack.clear();
264 } else {
265 return None;
266 }
267 } else {
268 let operator = match self.reader.read_without_context::<Operator>() {
269 Some(o) => o,
270 None => {
271 warn!("failed to read operator in content stream");
272
273 self.reader.jump_to_end();
274 return None;
275 }
276 };
277
278 if operator.as_ref() == b"BI" {
280 let inline_dict = self.reader.read_without_context::<InlineImageDict<'_>>()?;
282 let dict = inline_dict.get_dict().clone();
283
284 self.reader.read_white_space()?;
286
287 let stream_data = self.reader.tail()?;
288 let start_offset = self.reader.offset();
289
290 if let Some((image_end, advance)) =
294 find_a85_inline_image_end(stream_data, &dict)
295 {
296 let image_data = &stream_data[..image_end];
297 let stream = Stream::new(image_data, dict.clone());
298 self.stack.push(Object::Stream(stream));
299 self.reader.read_bytes(advance)?;
300 self.reader.skip_white_spaces();
301
302 return Some(Instruction {
303 operands: core::mem::take(&mut self.stack),
304 operator,
305 });
306 }
307
308 if let Some(raw_size) = compute_raw_inline_image_size(&dict)
314 && stream_data.len() >= raw_size
315 {
316 let image_data = &stream_data[..raw_size];
317 let stream = Stream::new(image_data, dict.clone());
318 self.stack.push(Object::Stream(stream));
319 self.reader.read_bytes(raw_size)?;
321 self.reader.skip_white_spaces();
322 let _ = self.reader.read_bytes(2);
324 self.reader.skip_white_spaces();
325
326 return Some(Instruction {
327 operands: core::mem::take(&mut self.stack),
328 operator,
329 });
330 }
331
332 'outer: while let Some(bytes) = self.reader.peek_bytes(2) {
333 if bytes == b"EI" {
334 let end_offset = self.reader.offset() - start_offset;
335 let image_data = &stream_data[..end_offset];
336
337 let stream = Stream::new(image_data, dict.clone());
338
339 let tail = &self.reader.tail()?[2..];
346 let mut find_reader = Reader::new(tail);
347
348 while let Some(bytes) = find_reader.peek_bytes(2) {
349 if bytes == b"EI" {
350 let analyze_data = &tail;
351
352 if analyze_data.iter().any(|c| !c.is_ascii()) {
355 self.reader.read_bytes(2)?;
356 continue 'outer;
357 }
358
359 let iter = TypedIter::new(tail);
368 let mut found = false;
369
370 for (counter, op) in iter.enumerate() {
371 if counter >= 20 {
377 found = true;
378 break;
379 }
380
381 if matches!(
382 op,
383 TypedInstruction::NextLineAndShowText(_)
384 | TypedInstruction::ShowText(_)
385 | TypedInstruction::ShowTexts(_)
386 | TypedInstruction::ShowTextWithParameters(_)
387 ) {
388 found = true;
391 break;
392 }
393 }
394
395 if !found {
396 self.reader.read_bytes(2)?;
399 continue 'outer;
400 }
401 } else if bytes == b"BI" {
402 let mut cloned = find_reader.clone();
405 cloned.read_bytes(2)?;
406 if cloned
407 .read_without_context::<InlineImageDict<'_>>()
408 .is_some()
409 {
410 break;
411 }
412 }
413
414 find_reader.read_byte()?;
415 }
416
417 self.stack.push(Object::Stream(stream));
418
419 self.reader.read_bytes(2)?;
420 self.reader.skip_white_spaces();
421
422 break;
423 } else {
424 self.reader.read_byte()?;
425 }
426 }
427 }
428
429 return Some(Instruction {
430 operands: core::mem::take(&mut self.stack),
431 operator,
432 });
433 }
434
435 self.reader.skip_white_spaces_and_comments();
436 }
437
438 None
439 }
440}
441
442#[derive(Clone)]
445pub struct TypedIter<'a> {
446 untyped: UntypedIter<'a>,
447}
448
449impl<'a> TypedIter<'a> {
450 pub fn new(data: &'a [u8]) -> Self {
452 Self {
453 untyped: UntypedIter::new(data),
454 }
455 }
456
457 pub(crate) fn from_untyped(untyped: UntypedIter<'a>) -> Self {
458 Self { untyped }
459 }
460}
461
462impl<'a> Iterator for TypedIter<'a> {
463 type Item = TypedInstruction<'a>;
464
465 fn next(&mut self) -> Option<Self::Item> {
466 let op = &self.untyped.next()?;
467 match TypedInstruction::dispatch(op) {
468 Some(op) => Some(op),
469 None => {
474 if [
475 &b"m"[..],
476 &b"l"[..],
477 &b"c"[..],
478 &b"v"[..],
479 &b"y"[..],
480 &b"h"[..],
481 &b"re"[..],
482 ]
483 .contains(&op.operator.0.deref())
484 {
485 None
486 } else {
487 Some(TypedInstruction::Fallback(op.operator.clone()))
488 }
489 }
490 }
491 }
492}
493
494pub struct Instruction<'a> {
496 pub operands: Stack<'a>,
498 pub operator: Operator,
500}
501
502impl<'a> Instruction<'a> {
503 pub fn operands(self) -> OperandIterator<'a> {
505 OperandIterator::new(self.operands)
506 }
507}
508
509#[derive(Debug, Clone, PartialEq)]
511pub struct Stack<'a>(SmallVec<[Object<'a>; OPERANDS_THRESHOLD]>);
512
513impl<'a> Default for Stack<'a> {
514 fn default() -> Self {
515 Self::new()
516 }
517}
518
519impl<'a> Stack<'a> {
520 pub fn new() -> Self {
522 Self(SmallVec::new())
523 }
524
525 fn push(&mut self, operand: Object<'a>) {
526 self.0.push(operand);
527 }
528
529 fn clear(&mut self) {
530 self.0.clear();
531 }
532
533 fn len(&self) -> usize {
534 self.0.len()
535 }
536
537 fn get<T>(&self, index: usize) -> Option<T>
538 where
539 T: ObjectLike<'a>,
540 {
541 self.0.get(index).and_then(|e| e.clone().cast::<T>())
542 }
543
544 fn get_all<T>(&self) -> Option<SmallVec<[T; OPERANDS_THRESHOLD]>>
545 where
546 T: ObjectLike<'a>,
547 {
548 let mut operands = SmallVec::new();
549
550 for op in &self.0 {
551 let converted = op.clone().cast::<T>()?;
552 operands.push(converted);
553 }
554
555 Some(operands)
556 }
557}
558
559pub struct OperandIterator<'a> {
561 stack: Stack<'a>,
562 cur_index: usize,
563}
564
565impl<'a> OperandIterator<'a> {
566 fn new(stack: Stack<'a>) -> Self {
567 Self {
568 stack,
569 cur_index: 0,
570 }
571 }
572}
573
574impl<'a> Iterator for OperandIterator<'a> {
575 type Item = Object<'a>;
576
577 fn next(&mut self) -> Option<Self::Item> {
578 if let Some(item) = self.stack.get::<Object<'a>>(self.cur_index) {
579 self.cur_index += 1;
580
581 Some(item)
582 } else {
583 None
584 }
585 }
586}
587
588pub(crate) trait OperatorTrait<'a>
589where
590 Self: Sized + Into<TypedInstruction<'a>> + TryFrom<TypedInstruction<'a>>,
591{
592 const OPERATOR: &'static str;
593
594 fn from_stack(stack: &Stack<'a>) -> Option<Self>;
595}
596
597mod macros {
598 macro_rules! op_impl {
599 ($t:ident $(<$l:lifetime>),*, $e:expr, $n:expr, $body:expr) => {
600 impl<'a> OperatorTrait<'a> for $t$(<$l>),* {
601 const OPERATOR: &'static str = $e;
602
603 fn from_stack(stack: &Stack<'a>) -> Option<Self> {
604 if $n != u8::MAX as usize {
605 if stack.len() != $n {
606 warn!("wrong stack length {} for operator {}, expected {}", stack.len(), Self::OPERATOR, $n);
607 }
608 }
609
610 $body(stack).or_else(|| {
611 warn!("failed to convert operands for operator {}", Self::OPERATOR);
612
613 None
614 })
615 }
616 }
617
618 impl<'a> From<$t$(<$l>),*> for TypedInstruction<'a> {
619 fn from(value: $t$(<$l>),*) -> Self {
620 TypedInstruction::$t(value)
621 }
622 }
623
624 impl<'a> TryFrom<TypedInstruction<'a>> for $t$(<$l>),* {
625 type Error = ();
626
627 fn try_from(value: TypedInstruction<'a>) -> core::result::Result<Self, Self::Error> {
628 match value {
629 TypedInstruction::$t(e) => Ok(e),
630 _ => Err(())
631 }
632 }
633 }
634 };
635 }
636
637 macro_rules! op0 {
643 ($t:ident $(<$l:lifetime>),*, $e:expr) => {
644 crate::content::macros::op_impl!($t$(<$l>),*, $e, 0, |_| Some(Self));
645 }
646 }
647
648 macro_rules! op1 {
649 ($t:ident $(<$l:lifetime>),*, $e:expr) => {
650 crate::content::macros::op_impl!($t$(<$l>),*, $e, 1, |stack: &Stack<'a>| {
651 let shift = stack.len().saturating_sub(1);
652 Some(Self(stack.get(0 + shift)?))
653 });
654 }
655 }
656
657 macro_rules! op_all {
658 ($t:ident $(<$l:lifetime>),*, $e:expr) => {
659 crate::content::macros::op_impl!($t$(<$l>),*, $e, u8::MAX as usize, |stack: &Stack<'a>|
660 Some(Self(stack.get_all()?)));
661 }
662 }
663
664 macro_rules! op2 {
665 ($t:ident $(<$l:lifetime>),*, $e:expr) => {
666 crate::content::macros::op_impl!($t$(<$l>),*, $e, 2, |stack: &Stack<'a>| {
667 let shift = stack.len().saturating_sub(2);
668 Some(Self(stack.get(0 + shift)?, stack.get(1 + shift)?))
669 });
670 }
671 }
672
673 macro_rules! op3 {
674 ($t:ident $(<$l:lifetime>),*, $e:expr) => {
675 crate::content::macros::op_impl!($t$(<$l>),*, $e, 3, |stack: &Stack<'a>| {
676 let shift = stack.len().saturating_sub(3);
677 Some(Self(stack.get(0 + shift)?, stack.get(1 + shift)?,
678 stack.get(2 + shift)?))
679 });
680 }
681 }
682
683 macro_rules! op4 {
684 ($t:ident $(<$l:lifetime>),*, $e:expr) => {
685 crate::content::macros::op_impl!($t$(<$l>),*, $e, 4, |stack: &Stack<'a>| {
686 let shift = stack.len().saturating_sub(4);
687 Some(Self(stack.get(0 + shift)?, stack.get(1 + shift)?,
688 stack.get(2 + shift)?, stack.get(3 + shift)?))
689 });
690 }
691 }
692
693 macro_rules! op6 {
694 ($t:ident $(<$l:lifetime>),*, $e:expr) => {
695 crate::content::macros::op_impl!($t$(<$l>),*, $e, 6, |stack: &Stack<'a>| {
696 let shift = stack.len().saturating_sub(6);
697 Some(Self(stack.get(0 + shift)?, stack.get(1 + shift)?,
698 stack.get(2 + shift)?, stack.get(3 + shift)?,
699 stack.get(4 + shift)?, stack.get(5 + shift)?))
700 });
701 }
702 }
703
704 pub(crate) use op_all;
705 pub(crate) use op_impl;
706 pub(crate) use op0;
707 pub(crate) use op1;
708 pub(crate) use op2;
709 pub(crate) use op3;
710 pub(crate) use op4;
711 pub(crate) use op6;
712}