#[allow(missing_docs)]
pub mod ops;
use crate::content::ops::TypedInstruction;
use crate::object;
use crate::object::dict::InlineImageDict;
use crate::object::name::{Name, skip_name_like};
use crate::object::{Array, Null, Number, Object, Stream};
use crate::reader::Reader;
use crate::reader::{Readable, ReaderContext, ReaderExt, Skippable};
use crate::trivia::is_white_space_character;
use crate::util::find_needle;
use core::array;
use core::fmt::{Debug, Formatter};
use core::ops::Deref;
use smallvec::SmallVec;
const OPERANDS_THRESHOLD: usize = 10;
impl Debug for Operator<'_> {
fn fmt(&self, f: &mut Formatter<'_>) -> core::fmt::Result {
write!(f, "{}", self.0.as_str())
}
}
#[derive(Clone, PartialEq)]
pub struct Operator<'a>(Name<'a>);
impl Deref for Operator<'_> {
type Target = [u8];
fn deref(&self) -> &Self::Target {
self.0.as_ref()
}
}
impl Skippable for Operator<'_> {
fn skip(r: &mut Reader<'_>, _: bool) -> Option<()> {
skip_name_like(r, false).map(|_| ())
}
}
impl<'a> Readable<'a> for Operator<'a> {
fn read(r: &mut Reader<'a>, _: &ReaderContext<'a>) -> Option<Self> {
let start = r.offset();
skip_name_like(r, false)?;
let end = r.offset();
let data = r.range(start..end)?;
if data.is_empty() {
return None;
}
Some(Self(Name::new(data)?))
}
}
#[derive(Clone)]
pub struct UntypedIter<'a> {
reader: Reader<'a>,
stack: Stack<'a>,
operator: Option<Operator<'a>>,
}
impl<'a> UntypedIter<'a> {
pub fn new(data: &'a [u8]) -> Self {
Self {
reader: Reader::new(data),
stack: Stack::new(),
operator: None,
}
}
pub fn empty() -> Self {
Self {
reader: Reader::new(&[]),
stack: Stack::new(),
operator: None,
}
}
#[allow(clippy::should_implement_trait)]
pub fn next(&mut self) -> Option<Instruction<'_, 'a>> {
self.stack.clear();
self.operator = None;
self.reader.skip_white_spaces_and_comments();
while !self.reader.at_end() {
if matches!(
self.reader.peek_byte()?,
b'/' | b'.' | b'+' | b'-' | b'0'..=b'9' | b'[' | b'<' | b'('
) {
if let Some(object) = self.reader.read_without_context::<Object<'_>>() {
self.stack.push(object)?;
} else if self.reader.read_without_context::<Operator<'_>>().is_some() {
self.stack.clear();
} else {
return None;
}
} else {
let operator = match self.reader.read_without_context::<Operator<'_>>() {
Some(o) => o,
None => {
warn!("failed to read operator in content stream");
self.reader.jump_to_end();
return None;
}
};
if operator.as_ref() == b"BI" {
let inline_dict = self.reader.read_without_context::<InlineImageDict<'_>>()?;
let dict = inline_dict.get_dict().clone();
self.reader.read_white_space()?;
let stream_data = self.reader.tail()?;
let start_offset = self.reader.offset();
'outer: while let Some(pos) = find_needle(self.reader.tail()?, b"EI") {
self.reader.read_bytes(pos)?;
if self.reader.peek_bytes(2) == Some(b"EI") {
if self
.reader
.peek_bytes(3)
.is_some_and(|b| !is_white_space_character(b[2]))
{
self.reader.read_bytes(3)?;
continue;
}
let end_offset = self.reader.offset() - start_offset;
let image_data = &stream_data[..end_offset];
let stream = Stream::new(image_data, dict.clone());
let tail = &self.reader.tail()?[2..];
let mut find_reader = Reader::new(tail);
while !find_reader.at_end() {
let remaining = find_reader.tail()?;
let next_ei = find_needle(remaining, b"EI");
let next_bi = find_needle(remaining, b"BI");
let (next_pos, is_ei) = match (next_ei, next_bi) {
(Some(ei), Some(bi)) if ei <= bi => (ei, true),
(Some(_), Some(bi)) => (bi, false),
(Some(ei), None) => (ei, true),
(None, Some(bi)) => (bi, false),
(None, None) => break,
};
find_reader.read_bytes(next_pos)?;
if is_ei {
let analyze_data = &tail[..find_reader.offset()];
if analyze_data.iter().any(|c| !c.is_ascii()) {
self.reader.read_bytes(2)?;
continue 'outer;
}
let mut iter = TypedIter::new(tail);
let mut found = false;
let mut counter = 0;
while let Some(op) = iter.next() {
if counter >= 20 {
found = true;
break;
}
if matches!(
op,
TypedInstruction::NextLineAndShowText(_)
| TypedInstruction::ShowText(_)
| TypedInstruction::ShowTexts(_)
| TypedInstruction::ShowTextWithParameters(_)
) {
found = true;
break;
}
counter += 1;
}
if !found {
self.reader.read_bytes(2)?;
continue 'outer;
}
} else {
let mut cloned = find_reader.clone();
cloned.read_bytes(2)?;
if cloned
.read_without_context::<InlineImageDict<'_>>()
.is_some()
{
break;
}
}
find_reader.read_byte()?;
}
self.stack.push(Object::Stream(stream))?;
self.reader.read_bytes(2)?;
self.reader.skip_white_spaces();
break;
}
}
}
self.operator = Some(operator);
return Some(Instruction {
operands: &self.stack,
operator: self.operator.as_ref().unwrap(),
});
}
self.reader.skip_white_spaces_and_comments();
}
None
}
}
#[derive(Clone)]
pub struct TypedIter<'a> {
untyped: UntypedIter<'a>,
}
impl<'a> TypedIter<'a> {
pub fn new(data: &'a [u8]) -> Self {
Self {
untyped: UntypedIter::new(data),
}
}
pub(crate) fn from_untyped(untyped: UntypedIter<'a>) -> Self {
Self { untyped }
}
#[allow(clippy::should_implement_trait)]
pub fn next(&mut self) -> Option<TypedInstruction<'_, 'a>> {
let op = self.untyped.next()?;
match TypedInstruction::dispatch(&op) {
Some(op) => Some(op),
None => {
if [
&b"m"[..],
&b"l"[..],
&b"c"[..],
&b"v"[..],
&b"y"[..],
&b"h"[..],
&b"re"[..],
]
.contains(&op.operator.0.deref())
{
None
} else {
Some(TypedInstruction::Fallback(op.operator))
}
}
}
}
}
pub struct Instruction<'b, 'a> {
pub operands: &'b Stack<'a>,
pub operator: &'b Operator<'a>,
}
impl<'b, 'a> Instruction<'b, 'a> {
pub fn operands(&self) -> OperandIterator<'b, 'a> {
OperandIterator::new(self.operands)
}
}
pub struct Stack<'a> {
data: [Object<'a>; OPERANDS_THRESHOLD],
len: usize,
}
impl<'a> Default for Stack<'a> {
fn default() -> Self {
Self::new()
}
}
impl<'a> Stack<'a> {
pub fn new() -> Self {
Self {
data: array::from_fn(|_| Object::Null(Null)),
len: 0,
}
}
fn push(&mut self, operand: Object<'a>) -> Option<()> {
if self.len >= OPERANDS_THRESHOLD {
return None;
}
self.data[self.len] = operand;
self.len += 1;
Some(())
}
fn clear(&mut self) {
self.len = 0;
}
fn len(&self) -> usize {
self.len
}
fn as_slice(&self) -> &[Object<'a>] {
&self.data[..self.len]
}
fn get<'b, T>(&'b self, index: usize) -> Option<T>
where
T: Operand<'b, 'a>,
{
self.as_slice().get(index).and_then(T::from_object)
}
fn get_all<'b, T>(&'b self) -> Option<SmallVec<[T; OPERANDS_THRESHOLD]>>
where
T: Operand<'b, 'a>,
{
let mut operands = SmallVec::new();
for op in self.as_slice() {
let converted = T::from_object(op)?;
operands.push(converted);
}
Some(operands)
}
}
impl Debug for Stack<'_> {
fn fmt(&self, f: &mut Formatter<'_>) -> core::fmt::Result {
f.debug_list().entries(self.as_slice()).finish()
}
}
impl Clone for Stack<'_> {
fn clone(&self) -> Self {
let mut stack = Self::new();
for item in self.as_slice() {
stack.push(item.clone()).unwrap();
}
stack
}
}
impl PartialEq for Stack<'_> {
fn eq(&self, other: &Self) -> bool {
self.as_slice() == other.as_slice()
}
}
trait Operand<'b, 'a>: Sized {
fn from_object(object: &'b Object<'a>) -> Option<Self>;
}
impl<'b, 'a> Operand<'b, 'a> for Number {
fn from_object(object: &'b Object<'a>) -> Option<Self> {
match object {
Object::Number(n) => Some(*n),
_ => None,
}
}
}
impl<'b, 'a> Operand<'b, 'a> for &'b object::String<'a> {
fn from_object(object: &'b Object<'a>) -> Option<Self> {
match object {
Object::String(s) => Some(s),
_ => None,
}
}
}
impl<'b, 'a> Operand<'b, 'a> for &'b Name<'a> {
fn from_object(object: &'b Object<'a>) -> Option<Self> {
match object {
Object::Name(n) => Some(n),
_ => None,
}
}
}
impl<'b, 'a> Operand<'b, 'a> for &'b Array<'a> {
fn from_object(object: &'b Object<'a>) -> Option<Self> {
match object {
Object::Array(a) => Some(a),
_ => None,
}
}
}
impl<'b, 'a> Operand<'b, 'a> for &'b Stream<'a> {
fn from_object(object: &'b Object<'a>) -> Option<Self> {
match object {
Object::Stream(s) => Some(s),
_ => None,
}
}
}
impl<'b, 'a> Operand<'b, 'a> for &'b Object<'a> {
fn from_object(object: &'b Object<'a>) -> Option<Self> {
Some(object)
}
}
pub struct OperandIterator<'b, 'a> {
stack: &'b Stack<'a>,
cur_index: usize,
}
impl<'b, 'a> OperandIterator<'b, 'a> {
fn new(stack: &'b Stack<'a>) -> Self {
Self {
stack,
cur_index: 0,
}
}
}
impl<'b, 'a> Iterator for OperandIterator<'b, 'a> {
type Item = &'b Object<'a>;
fn next(&mut self) -> Option<Self::Item> {
if let Some(item) = self.stack.as_slice().get(self.cur_index) {
self.cur_index += 1;
Some(item)
} else {
None
}
}
}
pub(crate) trait OperatorTrait<'b, 'a>: Sized {
const OPERATOR: &'static str;
fn from_stack(stack: &'b Stack<'a>) -> Option<Self>;
}
mod macros {
macro_rules! op_impl {
($t:ident $(<$($l:lifetime),+>)?, $e:expr, $n:expr, |$stack:ident : $stack_ty:ty| $body:block) => {
impl<'b, 'a> OperatorTrait<'b, 'a> for $t$(<$($l),+>)? {
const OPERATOR: &'static str = $e;
#[inline(always)]
fn from_stack($stack: $stack_ty) -> Option<Self> {
$body.or_else(|| {
warn!("failed to convert operands for operator {}", Self::OPERATOR);
None
})
}
}
impl<'b, 'a> From<$t$(<$($l),+>)?> for TypedInstruction<'b, 'a> {
fn from(value: $t$(<$($l),+>)?) -> Self {
TypedInstruction::$t(value)
}
}
impl<'b, 'a> TryFrom<TypedInstruction<'b, 'a>> for $t$(<$($l),+>)? {
type Error = ();
fn try_from(value: TypedInstruction<'b, 'a>) -> core::result::Result<Self, Self::Error> {
match value {
TypedInstruction::$t(e) => Ok(e),
_ => Err(())
}
}
}
};
}
macro_rules! op0 {
($t:ident $(<$($l:lifetime),+>)?, $e:expr) => {
crate::content::macros::op_impl!($t$(<$($l),+>)?, $e, 0, |_stack: &'b Stack<'a>| {
Some(Self)
});
}
}
macro_rules! op1 {
($t:ident $(<$($l:lifetime),+>)?, $e:expr) => {
crate::content::macros::op_impl!($t$(<$($l),+>)?, $e, 1, |stack: &'b Stack<'a>| {
let shift = stack.len().saturating_sub(1);
Some(Self(stack.get(0 + shift)?))
});
}
}
macro_rules! op_all {
($t:ident $(<$($l:lifetime),+>)?, $e:expr) => {
crate::content::macros::op_impl!($t$(<$($l),+>)?, $e, u8::MAX as usize, |stack: &'b Stack<'a>| {
Some(Self(stack.get_all()?))
});
}
}
macro_rules! op2 {
($t:ident $(<$($l:lifetime),+>)?, $e:expr) => {
crate::content::macros::op_impl!($t$(<$($l),+>)?, $e, 2, |stack: &'b Stack<'a>| {
let shift = stack.len().saturating_sub(2);
Some(Self(stack.get(0 + shift)?, stack.get(1 + shift)?))
});
}
}
macro_rules! op3 {
($t:ident $(<$($l:lifetime),+>)?, $e:expr) => {
crate::content::macros::op_impl!($t$(<$($l),+>)?, $e, 3, |stack: &'b Stack<'a>| {
let shift = stack.len().saturating_sub(3);
Some(Self(stack.get(0 + shift)?, stack.get(1 + shift)?,
stack.get(2 + shift)?))
});
}
}
macro_rules! op4 {
($t:ident $(<$($l:lifetime),+>)?, $e:expr) => {
crate::content::macros::op_impl!($t$(<$($l),+>)?, $e, 4, |stack: &'b Stack<'a>| {
let shift = stack.len().saturating_sub(4);
Some(Self(stack.get(0 + shift)?, stack.get(1 + shift)?,
stack.get(2 + shift)?, stack.get(3 + shift)?))
});
}
}
macro_rules! op6 {
($t:ident $(<$($l:lifetime),+>)?, $e:expr) => {
crate::content::macros::op_impl!($t$(<$($l),+>)?, $e, 6, |stack: &'b Stack<'a>| {
let shift = stack.len().saturating_sub(6);
Some(Self(stack.get(0 + shift)?, stack.get(1 + shift)?,
stack.get(2 + shift)?, stack.get(3 + shift)?,
stack.get(4 + shift)?, stack.get(5 + shift)?))
});
}
}
pub(crate) use op_all;
pub(crate) use op_impl;
pub(crate) use op0;
pub(crate) use op1;
pub(crate) use op2;
pub(crate) use op3;
pub(crate) use op4;
pub(crate) use op6;
}