#[allow(missing_docs)]
pub mod ops;
use crate::content::ops::TypedInstruction;
use crate::object::Stream;
use crate::object::dict::InlineImageDict;
use crate::object::name::{Name, skip_name_like};
use crate::object::{Object, ObjectLike};
use crate::reader::{Readable, Reader, ReaderContext, Skippable};
use log::warn;
use smallvec::SmallVec;
use std::fmt::{Debug, Formatter};
use std::ops::Deref;
const OPERANDS_THRESHOLD: usize = 6;
impl Debug for Operator<'_> {
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
write!(f, "{}", self.0.as_str())
}
}
#[derive(Clone, PartialEq)]
pub struct Operator<'a>(Name<'a>);
impl Deref for Operator<'_> {
type Target = [u8];
fn deref(&self) -> &Self::Target {
self.0.as_ref()
}
}
impl Skippable for Operator<'_> {
fn skip(r: &mut Reader<'_>, _: bool) -> Option<()> {
skip_name_like(r, false).map(|_| ())
}
}
impl<'a> Readable<'a> for Operator<'a> {
fn read(r: &mut Reader<'a>, _: &ReaderContext) -> Option<Self> {
let data = {
let start = r.offset();
skip_name_like(r, false)?;
let end = r.offset();
let data = r.range(start..end).unwrap();
if data.is_empty() {
return None;
}
data
};
Some(Operator(Name::from_unescaped(data)))
}
}
#[derive(Clone)]
pub struct UntypedIter<'a> {
reader: Reader<'a>,
stack: Stack<'a>,
}
impl<'a> UntypedIter<'a> {
pub fn new(data: &'a [u8]) -> UntypedIter<'a> {
Self {
reader: Reader::new(data),
stack: Stack::new(),
}
}
pub fn empty() -> UntypedIter<'a> {
Self {
reader: Reader::new(&[]),
stack: Stack::new(),
}
}
}
impl<'a> Iterator for UntypedIter<'a> {
type Item = Instruction<'a>;
fn next(&mut self) -> Option<Self::Item> {
self.stack.clear();
self.reader.skip_white_spaces_and_comments();
while !self.reader.at_end() {
if matches!(
self.reader.peek_byte()?,
b'/' | b'.' | b'+' | b'-' | b'0'..=b'9' | b'[' | b'<' | b'('
) {
self.stack
.push(self.reader.read_without_context::<Object>()?);
} else {
let operator = match self.reader.read_without_context::<Operator>() {
Some(o) => o,
None => {
warn!("failed to read operator in content stream");
self.reader.jump_to_end();
return None;
}
};
if operator.as_ref() == b"BI" {
let inline_dict = self.reader.read_without_context::<InlineImageDict>()?;
let dict = inline_dict.get_dict().clone();
self.reader.read_white_space()?;
let stream_data = self.reader.tail()?;
let start_offset = self.reader.offset();
'outer: while let Some(bytes) = self.reader.peek_bytes(2) {
if bytes == b"EI" {
let end_offset = self.reader.offset() - start_offset;
let image_data = &stream_data[..end_offset];
let stream = Stream::from_raw(image_data, dict.clone());
let tail = &self.reader.tail()?[2..];
let mut find_reader = Reader::new(tail);
while let Some(bytes) = find_reader.peek_bytes(2) {
if bytes == b"EI" {
let analyze_data = &tail;
if analyze_data.iter().any(|c| !c.is_ascii()) {
self.reader.read_bytes(2)?;
continue 'outer;
}
let iter = TypedIter::new(tail);
let mut found = false;
for (counter, op) in iter.enumerate() {
if counter >= 20 {
found = true;
break;
}
if matches!(
op,
TypedInstruction::NextLineAndShowText(_)
| TypedInstruction::ShowText(_)
| TypedInstruction::ShowTexts(_)
| TypedInstruction::ShowTextWithParameters(_)
) {
found = true;
break;
}
}
if !found {
self.reader.read_bytes(2)?;
continue 'outer;
}
} else if bytes == b"BI" {
let mut cloned = find_reader.clone();
cloned.read_bytes(2)?;
if cloned.read_without_context::<InlineImageDict>().is_some() {
break;
}
}
find_reader.read_byte()?;
}
self.stack.push(Object::Stream(stream));
self.reader.read_bytes(2)?;
self.reader.skip_white_spaces();
break;
} else {
self.reader.read_byte()?;
}
}
}
return Some(Instruction {
operands: self.stack.clone(),
operator,
});
}
self.reader.skip_white_spaces_and_comments();
}
None
}
}
#[derive(Clone)]
pub struct TypedIter<'a> {
untyped: UntypedIter<'a>,
}
impl<'a> TypedIter<'a> {
pub fn new(data: &'a [u8]) -> TypedIter<'a> {
Self {
untyped: UntypedIter::new(data),
}
}
pub(crate) fn from_untyped(untyped: UntypedIter<'a>) -> TypedIter<'a> {
Self { untyped }
}
}
impl<'a> Iterator for TypedIter<'a> {
type Item = TypedInstruction<'a>;
fn next(&mut self) -> Option<Self::Item> {
self.untyped
.next()
.and_then(|op| TypedInstruction::dispatch(&op))
}
}
pub struct Instruction<'a> {
pub operands: Stack<'a>,
pub operator: Operator<'a>,
}
impl<'a> Instruction<'a> {
pub fn operands(self) -> OperandIterator<'a> {
OperandIterator::new(self.operands)
}
}
#[derive(Debug, Clone, PartialEq)]
pub struct Stack<'a>(SmallVec<[Object<'a>; OPERANDS_THRESHOLD]>);
impl<'a> Default for Stack<'a> {
fn default() -> Self {
Self::new()
}
}
impl<'a> Stack<'a> {
pub fn new() -> Self {
Self(SmallVec::new())
}
fn push(&mut self, operand: Object<'a>) {
self.0.push(operand);
}
fn clear(&mut self) {
self.0.clear();
}
fn len(&self) -> usize {
self.0.len()
}
fn get<T>(&self, index: usize) -> Option<T>
where
T: ObjectLike<'a>,
{
self.0.get(index).and_then(|e| e.clone().cast::<T>())
}
fn get_all<T>(&self) -> Option<SmallVec<[T; OPERANDS_THRESHOLD]>>
where
T: ObjectLike<'a>,
{
let mut operands = SmallVec::new();
for op in &self.0 {
let converted = op.clone().cast::<T>()?;
operands.push(converted);
}
Some(operands)
}
}
pub struct OperandIterator<'a> {
stack: Stack<'a>,
cur_index: usize,
}
impl<'a> OperandIterator<'a> {
fn new(stack: Stack<'a>) -> Self {
Self {
stack,
cur_index: 0,
}
}
}
impl<'a> Iterator for OperandIterator<'a> {
type Item = Object<'a>;
fn next(&mut self) -> Option<Self::Item> {
if let Some(item) = self.stack.get::<Object<'a>>(self.cur_index) {
self.cur_index += 1;
Some(item)
} else {
None
}
}
}
pub(crate) trait OperatorTrait<'a>
where
Self: Sized + Into<TypedInstruction<'a>> + TryFrom<TypedInstruction<'a>>,
{
const OPERATOR: &'static str;
fn from_stack(stack: &Stack<'a>) -> Option<Self>;
}
mod macros {
macro_rules! op_impl {
($t:ident $(<$l:lifetime>),*, $e:expr, $n:expr, $body:expr) => {
impl<'a> OperatorTrait<'a> for $t$(<$l>),* {
const OPERATOR: &'static str = $e;
fn from_stack(stack: &Stack<'a>) -> Option<Self> {
if $n != u8::MAX as usize {
if stack.len() != $n {
warn!("wrong stack length {} for operator {}, expected {}", stack.len(), Self::OPERATOR, $n);
}
}
$body(stack).or_else(|| {
warn!("failed to convert operands for operator {}", Self::OPERATOR);
None
})
}
}
impl<'a> From<$t$(<$l>),*> for TypedInstruction<'a> {
fn from(value: $t$(<$l>),*) -> Self {
TypedInstruction::$t(value)
}
}
impl<'a> TryFrom<TypedInstruction<'a>> for $t$(<$l>),* {
type Error = ();
fn try_from(value: TypedInstruction<'a>) -> std::result::Result<Self, Self::Error> {
match value {
TypedInstruction::$t(e) => Ok(e),
_ => Err(())
}
}
}
};
}
macro_rules! op0 {
($t:ident $(<$l:lifetime>),*, $e:expr) => {
crate::content::macros::op_impl!($t$(<$l>),*, $e, 0, |_| Some(Self));
}
}
macro_rules! op1 {
($t:ident $(<$l:lifetime>),*, $e:expr) => {
crate::content::macros::op_impl!($t$(<$l>),*, $e, 1, |stack: &Stack<'a>| {
let shift = stack.len().saturating_sub(1);
Some(Self(stack.get(0 + shift)?))
});
}
}
macro_rules! op_all {
($t:ident $(<$l:lifetime>),*, $e:expr) => {
crate::content::macros::op_impl!($t$(<$l>),*, $e, u8::MAX as usize, |stack: &Stack<'a>|
Some(Self(stack.get_all()?)));
}
}
macro_rules! op2 {
($t:ident $(<$l:lifetime>),*, $e:expr) => {
crate::content::macros::op_impl!($t$(<$l>),*, $e, 2, |stack: &Stack<'a>| {
let shift = stack.len().saturating_sub(2);
Some(Self(stack.get(0 + shift)?, stack.get(1 + shift)?))
});
}
}
macro_rules! op3 {
($t:ident $(<$l:lifetime>),*, $e:expr) => {
crate::content::macros::op_impl!($t$(<$l>),*, $e, 3, |stack: &Stack<'a>| {
let shift = stack.len().saturating_sub(3);
Some(Self(stack.get(0 + shift)?, stack.get(1 + shift)?,
stack.get(2 + shift)?))
});
}
}
macro_rules! op4 {
($t:ident $(<$l:lifetime>),*, $e:expr) => {
crate::content::macros::op_impl!($t$(<$l>),*, $e, 4, |stack: &Stack<'a>| {
let shift = stack.len().saturating_sub(4);
Some(Self(stack.get(0 + shift)?, stack.get(1 + shift)?,
stack.get(2 + shift)?, stack.get(3 + shift)?))
});
}
}
macro_rules! op6 {
($t:ident $(<$l:lifetime>),*, $e:expr) => {
crate::content::macros::op_impl!($t$(<$l>),*, $e, 6, |stack: &Stack<'a>| {
let shift = stack.len().saturating_sub(6);
Some(Self(stack.get(0 + shift)?, stack.get(1 + shift)?,
stack.get(2 + shift)?, stack.get(3 + shift)?,
stack.get(4 + shift)?, stack.get(5 + shift)?))
});
}
}
pub(crate) use op_all;
pub(crate) use op_impl;
pub(crate) use op0;
pub(crate) use op1;
pub(crate) use op2;
pub(crate) use op3;
pub(crate) use op4;
pub(crate) use op6;
}