1#[allow(missing_docs)]
34pub mod ops;
35
36use crate::content::ops::TypedInstruction;
37use crate::object::Stream;
38use crate::object::dict::InlineImageDict;
39use crate::object::name::{Name, skip_name_like};
40use crate::object::{Object, ObjectLike};
41use crate::reader::Reader;
42use crate::reader::{Readable, ReaderContext, ReaderExt, Skippable};
43use log::warn;
44use smallvec::SmallVec;
45use std::fmt::{Debug, Formatter};
46use std::ops::Deref;
47
48const OPERANDS_THRESHOLD: usize = 6;
52
53impl Debug for Operator<'_> {
54 fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
55 write!(f, "{}", self.0.as_str())
56 }
57}
58
59#[derive(Clone, PartialEq)]
61pub struct Operator<'a>(Name<'a>);
62
63impl Deref for Operator<'_> {
64 type Target = [u8];
65
66 fn deref(&self) -> &Self::Target {
67 self.0.as_ref()
68 }
69}
70
71impl Skippable for Operator<'_> {
72 fn skip(r: &mut Reader<'_>, _: bool) -> Option<()> {
73 skip_name_like(r, false).map(|_| ())
74 }
75}
76
77impl<'a> Readable<'a> for Operator<'a> {
78 fn read(r: &mut Reader<'a>, _: &ReaderContext<'_>) -> Option<Self> {
79 let data = {
80 let start = r.offset();
81 skip_name_like(r, false)?;
82 let end = r.offset();
83 let data = r.range(start..end).unwrap();
84
85 if data.is_empty() {
86 return None;
87 }
88
89 data
90 };
91
92 Some(Operator(Name::from_unescaped(data)))
93 }
94}
95
96#[derive(Clone)]
98pub struct UntypedIter<'a> {
99 reader: Reader<'a>,
100 stack: Stack<'a>,
101}
102
103impl<'a> UntypedIter<'a> {
104 pub fn new(data: &'a [u8]) -> Self {
106 Self {
107 reader: Reader::new(data),
108 stack: Stack::new(),
109 }
110 }
111
112 pub fn empty() -> Self {
114 Self {
115 reader: Reader::new(&[]),
116 stack: Stack::new(),
117 }
118 }
119}
120
121impl<'a> Iterator for UntypedIter<'a> {
122 type Item = Instruction<'a>;
123
124 fn next(&mut self) -> Option<Self::Item> {
125 self.stack.clear();
126
127 self.reader.skip_white_spaces_and_comments();
128
129 while !self.reader.at_end() {
130 if matches!(
132 self.reader.peek_byte()?,
133 b'/' | b'.' | b'+' | b'-' | b'0'..=b'9' | b'[' | b'<' | b'('
134 ) {
135 self.stack
136 .push(self.reader.read_without_context::<Object<'_>>()?);
137 } else {
138 let operator = match self.reader.read_without_context::<Operator<'_>>() {
139 Some(o) => o,
140 None => {
141 warn!("failed to read operator in content stream");
142
143 self.reader.jump_to_end();
144 return None;
145 }
146 };
147
148 if operator.as_ref() == b"BI" {
150 let inline_dict = self.reader.read_without_context::<InlineImageDict<'_>>()?;
152 let dict = inline_dict.get_dict().clone();
153
154 self.reader.read_white_space()?;
156
157 let stream_data = self.reader.tail()?;
158 let start_offset = self.reader.offset();
159
160 'outer: while let Some(bytes) = self.reader.peek_bytes(2) {
161 if bytes == b"EI" {
162 let end_offset = self.reader.offset() - start_offset;
163 let image_data = &stream_data[..end_offset];
164
165 let stream = Stream::new(image_data, dict.clone());
166
167 let tail = &self.reader.tail()?[2..];
174 let mut find_reader = Reader::new(tail);
175
176 while let Some(bytes) = find_reader.peek_bytes(2) {
177 if bytes == b"EI" {
178 let analyze_data = &tail;
179
180 if analyze_data.iter().any(|c| !c.is_ascii()) {
183 self.reader.read_bytes(2)?;
184 continue 'outer;
185 }
186
187 let iter = TypedIter::new(tail);
196 let mut found = false;
197
198 for (counter, op) in iter.enumerate() {
199 if counter >= 20 {
205 found = true;
206 break;
207 }
208
209 if matches!(
210 op,
211 TypedInstruction::NextLineAndShowText(_)
212 | TypedInstruction::ShowText(_)
213 | TypedInstruction::ShowTexts(_)
214 | TypedInstruction::ShowTextWithParameters(_)
215 ) {
216 found = true;
219 break;
220 }
221 }
222
223 if !found {
224 self.reader.read_bytes(2)?;
227 continue 'outer;
228 }
229 } else if bytes == b"BI" {
230 let mut cloned = find_reader.clone();
233 cloned.read_bytes(2)?;
234 if cloned
235 .read_without_context::<InlineImageDict<'_>>()
236 .is_some()
237 {
238 break;
239 }
240 }
241
242 find_reader.read_byte()?;
243 }
244
245 self.stack.push(Object::Stream(stream));
246
247 self.reader.read_bytes(2)?;
248 self.reader.skip_white_spaces();
249
250 break;
251 } else {
252 self.reader.read_byte()?;
253 }
254 }
255 }
256
257 return Some(Instruction {
258 operands: self.stack.clone(),
259 operator,
260 });
261 }
262
263 self.reader.skip_white_spaces_and_comments();
264 }
265
266 None
267 }
268}
269
270#[derive(Clone)]
273pub struct TypedIter<'a> {
274 untyped: UntypedIter<'a>,
275}
276
277impl<'a> TypedIter<'a> {
278 pub fn new(data: &'a [u8]) -> Self {
280 Self {
281 untyped: UntypedIter::new(data),
282 }
283 }
284
285 pub(crate) fn from_untyped(untyped: UntypedIter<'a>) -> Self {
286 Self { untyped }
287 }
288}
289
290impl<'a> Iterator for TypedIter<'a> {
291 type Item = TypedInstruction<'a>;
292
293 fn next(&mut self) -> Option<Self::Item> {
294 let op = &self.untyped.next()?;
295 match TypedInstruction::dispatch(op) {
296 Some(op) => Some(op),
297 None => {
302 if [
303 &b"m"[..],
304 &b"l"[..],
305 &b"c"[..],
306 &b"v"[..],
307 &b"y"[..],
308 &b"h"[..],
309 &b"re"[..],
310 ]
311 .contains(&op.operator.0.deref())
312 {
313 None
314 } else {
315 Some(TypedInstruction::Fallback(op.operator.clone()))
316 }
317 }
318 }
319 }
320}
321
322pub struct Instruction<'a> {
324 pub operands: Stack<'a>,
326 pub operator: Operator<'a>,
328}
329
330impl<'a> Instruction<'a> {
331 pub fn operands(self) -> OperandIterator<'a> {
333 OperandIterator::new(self.operands)
334 }
335}
336
337#[derive(Debug, Clone, PartialEq)]
339pub struct Stack<'a>(SmallVec<[Object<'a>; OPERANDS_THRESHOLD]>);
340
341impl<'a> Default for Stack<'a> {
342 fn default() -> Self {
343 Self::new()
344 }
345}
346
347impl<'a> Stack<'a> {
348 pub fn new() -> Self {
350 Self(SmallVec::new())
351 }
352
353 fn push(&mut self, operand: Object<'a>) {
354 self.0.push(operand);
355 }
356
357 fn clear(&mut self) {
358 self.0.clear();
359 }
360
361 fn len(&self) -> usize {
362 self.0.len()
363 }
364
365 fn get<T>(&self, index: usize) -> Option<T>
366 where
367 T: ObjectLike<'a>,
368 {
369 self.0.get(index).and_then(|e| e.clone().cast::<T>())
370 }
371
372 fn get_all<T>(&self) -> Option<SmallVec<[T; OPERANDS_THRESHOLD]>>
373 where
374 T: ObjectLike<'a>,
375 {
376 let mut operands = SmallVec::new();
377
378 for op in &self.0 {
379 let converted = op.clone().cast::<T>()?;
380 operands.push(converted);
381 }
382
383 Some(operands)
384 }
385}
386
387pub struct OperandIterator<'a> {
389 stack: Stack<'a>,
390 cur_index: usize,
391}
392
393impl<'a> OperandIterator<'a> {
394 fn new(stack: Stack<'a>) -> Self {
395 Self {
396 stack,
397 cur_index: 0,
398 }
399 }
400}
401
402impl<'a> Iterator for OperandIterator<'a> {
403 type Item = Object<'a>;
404
405 fn next(&mut self) -> Option<Self::Item> {
406 if let Some(item) = self.stack.get::<Object<'a>>(self.cur_index) {
407 self.cur_index += 1;
408
409 Some(item)
410 } else {
411 None
412 }
413 }
414}
415
416pub(crate) trait OperatorTrait<'a>
417where
418 Self: Sized + Into<TypedInstruction<'a>> + TryFrom<TypedInstruction<'a>>,
419{
420 const OPERATOR: &'static str;
421
422 fn from_stack(stack: &Stack<'a>) -> Option<Self>;
423}
424
425mod macros {
426 macro_rules! op_impl {
427 ($t:ident $(<$l:lifetime>),*, $e:expr, $n:expr, $body:expr) => {
428 impl<'a> OperatorTrait<'a> for $t$(<$l>),* {
429 const OPERATOR: &'static str = $e;
430
431 fn from_stack(stack: &Stack<'a>) -> Option<Self> {
432 if $n != u8::MAX as usize {
433 if stack.len() != $n {
434 warn!("wrong stack length {} for operator {}, expected {}", stack.len(), Self::OPERATOR, $n);
435 }
436 }
437
438 $body(stack).or_else(|| {
439 warn!("failed to convert operands for operator {}", Self::OPERATOR);
440
441 None
442 })
443 }
444 }
445
446 impl<'a> From<$t$(<$l>),*> for TypedInstruction<'a> {
447 fn from(value: $t$(<$l>),*) -> Self {
448 TypedInstruction::$t(value)
449 }
450 }
451
452 impl<'a> TryFrom<TypedInstruction<'a>> for $t$(<$l>),* {
453 type Error = ();
454
455 fn try_from(value: TypedInstruction<'a>) -> std::result::Result<Self, Self::Error> {
456 match value {
457 TypedInstruction::$t(e) => Ok(e),
458 _ => Err(())
459 }
460 }
461 }
462 };
463 }
464
465 macro_rules! op0 {
471 ($t:ident $(<$l:lifetime>),*, $e:expr) => {
472 crate::content::macros::op_impl!($t$(<$l>),*, $e, 0, |_| Some(Self));
473 }
474 }
475
476 macro_rules! op1 {
477 ($t:ident $(<$l:lifetime>),*, $e:expr) => {
478 crate::content::macros::op_impl!($t$(<$l>),*, $e, 1, |stack: &Stack<'a>| {
479 let shift = stack.len().saturating_sub(1);
480 Some(Self(stack.get(0 + shift)?))
481 });
482 }
483 }
484
485 macro_rules! op_all {
486 ($t:ident $(<$l:lifetime>),*, $e:expr) => {
487 crate::content::macros::op_impl!($t$(<$l>),*, $e, u8::MAX as usize, |stack: &Stack<'a>|
488 Some(Self(stack.get_all()?)));
489 }
490 }
491
492 macro_rules! op2 {
493 ($t:ident $(<$l:lifetime>),*, $e:expr) => {
494 crate::content::macros::op_impl!($t$(<$l>),*, $e, 2, |stack: &Stack<'a>| {
495 let shift = stack.len().saturating_sub(2);
496 Some(Self(stack.get(0 + shift)?, stack.get(1 + shift)?))
497 });
498 }
499 }
500
501 macro_rules! op3 {
502 ($t:ident $(<$l:lifetime>),*, $e:expr) => {
503 crate::content::macros::op_impl!($t$(<$l>),*, $e, 3, |stack: &Stack<'a>| {
504 let shift = stack.len().saturating_sub(3);
505 Some(Self(stack.get(0 + shift)?, stack.get(1 + shift)?,
506 stack.get(2 + shift)?))
507 });
508 }
509 }
510
511 macro_rules! op4 {
512 ($t:ident $(<$l:lifetime>),*, $e:expr) => {
513 crate::content::macros::op_impl!($t$(<$l>),*, $e, 4, |stack: &Stack<'a>| {
514 let shift = stack.len().saturating_sub(4);
515 Some(Self(stack.get(0 + shift)?, stack.get(1 + shift)?,
516 stack.get(2 + shift)?, stack.get(3 + shift)?))
517 });
518 }
519 }
520
521 macro_rules! op6 {
522 ($t:ident $(<$l:lifetime>),*, $e:expr) => {
523 crate::content::macros::op_impl!($t$(<$l>),*, $e, 6, |stack: &Stack<'a>| {
524 let shift = stack.len().saturating_sub(6);
525 Some(Self(stack.get(0 + shift)?, stack.get(1 + shift)?,
526 stack.get(2 + shift)?, stack.get(3 + shift)?,
527 stack.get(4 + shift)?, stack.get(5 + shift)?))
528 });
529 }
530 }
531
532 pub(crate) use op_all;
533 pub(crate) use op_impl;
534 pub(crate) use op0;
535 pub(crate) use op1;
536 pub(crate) use op2;
537 pub(crate) use op3;
538 pub(crate) use op4;
539 pub(crate) use op6;
540}