#[allow(missing_docs)]
pub mod ops;
use crate::content::ops::TypedInstruction;
use crate::object::Stream;
use crate::object::dict::InlineImageDict;
use crate::object::dict::keys::{
ASCII85_DECODE_ABBREVIATION, BITS_PER_COMPONENT, BPC, COLORSPACE, CS, F, FILTER, H, HEIGHT, IM,
IMAGE_MASK, W, WIDTH,
};
use crate::object::name::{Name, skip_name_like};
use crate::object::{Array, Number, Object, ObjectLike};
use crate::reader::Reader;
use crate::reader::{Readable, ReaderContext, ReaderExt, Skippable};
use core::fmt::{Debug, Formatter};
use core::ops::Deref;
use log::warn;
use smallvec::SmallVec;
const OPERANDS_THRESHOLD: usize = 6;
fn compute_raw_inline_image_size(dict: &crate::object::dict::Dict<'_>) -> Option<usize> {
let has_filter = dict.get::<Name>(F).is_some()
|| dict.get::<Name>(FILTER).is_some()
|| dict.get::<Array>(F).is_some()
|| dict.get::<Array>(FILTER).is_some();
if has_filter {
return None;
}
let is_image_mask =
dict.get::<bool>(IM).unwrap_or(false) || dict.get::<bool>(IMAGE_MASK).unwrap_or(false);
let w = dict
.get::<Number>(W)
.or_else(|| dict.get::<Number>(WIDTH))?
.as_f64() as usize;
let h = dict
.get::<Number>(H)
.or_else(|| dict.get::<Number>(HEIGHT))?
.as_f64() as usize;
let (bpc, components): (usize, usize) = if is_image_mask {
(1, 1)
} else {
let bpc = dict
.get::<Number>(BPC)
.or_else(|| dict.get::<Number>(BITS_PER_COMPONENT))
.map(|n| n.as_f64() as usize)
.unwrap_or(8);
let cs_name: Option<Vec<u8>> = dict
.get::<Name>(CS)
.map(|n| n.as_ref().to_vec())
.or_else(|| dict.get::<Name>(COLORSPACE).map(|n| n.as_ref().to_vec()));
let components = match cs_name.as_deref() {
Some(b"G") | Some(b"DeviceGray") | Some(b"I") | Some(b"Indexed") => 1,
Some(b"RGB") | Some(b"DeviceRGB") => 3,
Some(b"CMYK") | Some(b"DeviceCMYK") => 4,
_ => return None, };
(bpc, components)
};
let bits_per_row = w * bpc * components;
let stride = bits_per_row.div_ceil(8);
Some(h * stride)
}
fn find_a85_inline_image_end(
stream_data: &[u8],
dict: &crate::object::dict::Dict<'_>,
) -> Option<(usize, usize)> {
let outermost: Option<Vec<u8>> = dict
.get::<Name>(F)
.map(|n| n.as_ref().to_vec())
.or_else(|| dict.get::<Name>(FILTER).map(|n| n.as_ref().to_vec()))
.or_else(|| {
dict.get::<Array>(F)
.and_then(|a| a.iter::<Name>().next())
.map(|n| n.as_ref().to_vec())
})
.or_else(|| {
dict.get::<Array>(FILTER)
.and_then(|a| a.iter::<Name>().next())
.map(|n| n.as_ref().to_vec())
});
let is_a85 = matches!(
outermost.as_deref(),
Some(ASCII85_DECODE_ABBREVIATION) | Some(b"ASCII85Decode")
);
if !is_a85 {
return None;
}
let mut i = 0;
while i + 2 <= stream_data.len() {
if stream_data[i] == b'~' && stream_data[i + 1] == b'>' {
let eos_end = i + 2;
let mut ei_start = eos_end;
while ei_start < stream_data.len()
&& matches!(stream_data[ei_start], b' ' | b'\t' | b'\n' | b'\r' | 0x0C)
{
ei_start += 1;
}
if stream_data.get(ei_start..ei_start + 2) == Some(b"EI") {
let after_ei = ei_start + 2;
let ei_delimited = after_ei >= stream_data.len()
|| matches!(stream_data[after_ei], b' ' | b'\t' | b'\n' | b'\r' | 0x0C);
if ei_delimited {
return Some((eos_end, after_ei));
}
}
}
i += 1;
}
None
}
impl Debug for Operator {
fn fmt(&self, f: &mut Formatter<'_>) -> core::fmt::Result {
write!(f, "{}", self.0.as_str())
}
}
#[derive(Clone, PartialEq)]
pub struct Operator(Name);
impl Deref for Operator {
type Target = [u8];
fn deref(&self) -> &Self::Target {
self.0.as_ref()
}
}
impl Skippable for Operator {
fn skip(r: &mut Reader<'_>, _: bool) -> Option<()> {
skip_name_like(r, false).map(|_| ())
}
}
impl Readable<'_> for Operator {
fn read(r: &mut Reader<'_>, _: &ReaderContext<'_>) -> Option<Self> {
let start = r.offset();
skip_name_like(r, false)?;
let end = r.offset();
let data = r.range(start..end)?;
if data.is_empty() {
return None;
}
Some(Self(Name::new(data)))
}
}
#[derive(Clone)]
pub struct UntypedIter<'a> {
reader: Reader<'a>,
stack: Stack<'a>,
}
impl<'a> UntypedIter<'a> {
pub fn new(data: &'a [u8]) -> Self {
Self {
reader: Reader::new(data),
stack: Stack::new(),
}
}
pub fn empty() -> Self {
Self {
reader: Reader::new(&[]),
stack: Stack::new(),
}
}
}
impl<'a> Iterator for UntypedIter<'a> {
type Item = Instruction<'a>;
fn next(&mut self) -> Option<Self::Item> {
self.stack.clear();
self.reader.skip_white_spaces_and_comments();
while !self.reader.at_end() {
if matches!(
self.reader.peek_byte()?,
b'/' | b'.' | b'+' | b'-' | b'0'..=b'9' | b'[' | b'<' | b'('
) {
if let Some(object) = self.reader.read_without_context::<Object<'_>>() {
self.stack.push(object);
} else if self.reader.read_without_context::<Operator>().is_some() {
self.stack.clear();
} else {
return None;
}
} else {
let operator = match self.reader.read_without_context::<Operator>() {
Some(o) => o,
None => {
warn!("failed to read operator in content stream");
self.reader.jump_to_end();
return None;
}
};
if operator.as_ref() == b"BI" {
let inline_dict = self.reader.read_without_context::<InlineImageDict<'_>>()?;
let dict = inline_dict.get_dict().clone();
self.reader.read_white_space()?;
let stream_data = self.reader.tail()?;
let start_offset = self.reader.offset();
if let Some((image_end, advance)) =
find_a85_inline_image_end(stream_data, &dict)
{
let image_data = &stream_data[..image_end];
let stream = Stream::new(image_data, dict.clone());
self.stack.push(Object::Stream(stream));
self.reader.read_bytes(advance)?;
self.reader.skip_white_spaces();
return Some(Instruction {
operands: core::mem::take(&mut self.stack),
operator,
});
}
if let Some(raw_size) = compute_raw_inline_image_size(&dict)
&& stream_data.len() >= raw_size
{
let image_data = &stream_data[..raw_size];
let stream = Stream::new(image_data, dict.clone());
self.stack.push(Object::Stream(stream));
self.reader.read_bytes(raw_size)?;
self.reader.skip_white_spaces();
let _ = self.reader.read_bytes(2);
self.reader.skip_white_spaces();
return Some(Instruction {
operands: core::mem::take(&mut self.stack),
operator,
});
}
'outer: while let Some(bytes) = self.reader.peek_bytes(2) {
if bytes == b"EI" {
let end_offset = self.reader.offset() - start_offset;
let image_data = &stream_data[..end_offset];
let stream = Stream::new(image_data, dict.clone());
let tail = &self.reader.tail()?[2..];
let mut find_reader = Reader::new(tail);
while let Some(bytes) = find_reader.peek_bytes(2) {
if bytes == b"EI" {
let analyze_data = &tail;
if analyze_data.iter().any(|c| !c.is_ascii()) {
self.reader.read_bytes(2)?;
continue 'outer;
}
let iter = TypedIter::new(tail);
let mut found = false;
for (counter, op) in iter.enumerate() {
if counter >= 20 {
found = true;
break;
}
if matches!(
op,
TypedInstruction::NextLineAndShowText(_)
| TypedInstruction::ShowText(_)
| TypedInstruction::ShowTexts(_)
| TypedInstruction::ShowTextWithParameters(_)
) {
found = true;
break;
}
}
if !found {
self.reader.read_bytes(2)?;
continue 'outer;
}
} else if bytes == b"BI" {
let mut cloned = find_reader.clone();
cloned.read_bytes(2)?;
if cloned
.read_without_context::<InlineImageDict<'_>>()
.is_some()
{
break;
}
}
find_reader.read_byte()?;
}
self.stack.push(Object::Stream(stream));
self.reader.read_bytes(2)?;
self.reader.skip_white_spaces();
break;
} else {
self.reader.read_byte()?;
}
}
}
return Some(Instruction {
operands: core::mem::take(&mut self.stack),
operator,
});
}
self.reader.skip_white_spaces_and_comments();
}
None
}
}
#[derive(Clone)]
pub struct TypedIter<'a> {
untyped: UntypedIter<'a>,
}
impl<'a> TypedIter<'a> {
pub fn new(data: &'a [u8]) -> Self {
Self {
untyped: UntypedIter::new(data),
}
}
pub(crate) fn from_untyped(untyped: UntypedIter<'a>) -> Self {
Self { untyped }
}
}
impl<'a> Iterator for TypedIter<'a> {
type Item = TypedInstruction<'a>;
fn next(&mut self) -> Option<Self::Item> {
let op = &self.untyped.next()?;
match TypedInstruction::dispatch(op) {
Some(op) => Some(op),
None => {
if [
&b"m"[..],
&b"l"[..],
&b"c"[..],
&b"v"[..],
&b"y"[..],
&b"h"[..],
&b"re"[..],
]
.contains(&op.operator.0.deref())
{
None
} else {
Some(TypedInstruction::Fallback(op.operator.clone()))
}
}
}
}
}
pub struct Instruction<'a> {
pub operands: Stack<'a>,
pub operator: Operator,
}
impl<'a> Instruction<'a> {
pub fn operands(self) -> OperandIterator<'a> {
OperandIterator::new(self.operands)
}
}
#[derive(Debug, Clone, PartialEq)]
pub struct Stack<'a>(SmallVec<[Object<'a>; OPERANDS_THRESHOLD]>);
impl<'a> Default for Stack<'a> {
fn default() -> Self {
Self::new()
}
}
impl<'a> Stack<'a> {
pub fn new() -> Self {
Self(SmallVec::new())
}
fn push(&mut self, operand: Object<'a>) {
self.0.push(operand);
}
fn clear(&mut self) {
self.0.clear();
}
fn len(&self) -> usize {
self.0.len()
}
fn get<T>(&self, index: usize) -> Option<T>
where
T: ObjectLike<'a>,
{
self.0.get(index).and_then(|e| e.clone().cast::<T>())
}
fn get_all<T>(&self) -> Option<SmallVec<[T; OPERANDS_THRESHOLD]>>
where
T: ObjectLike<'a>,
{
let mut operands = SmallVec::new();
for op in &self.0 {
let converted = op.clone().cast::<T>()?;
operands.push(converted);
}
Some(operands)
}
}
pub struct OperandIterator<'a> {
stack: Stack<'a>,
cur_index: usize,
}
impl<'a> OperandIterator<'a> {
fn new(stack: Stack<'a>) -> Self {
Self {
stack,
cur_index: 0,
}
}
}
impl<'a> Iterator for OperandIterator<'a> {
type Item = Object<'a>;
fn next(&mut self) -> Option<Self::Item> {
if let Some(item) = self.stack.get::<Object<'a>>(self.cur_index) {
self.cur_index += 1;
Some(item)
} else {
None
}
}
}
pub(crate) trait OperatorTrait<'a>
where
Self: Sized + Into<TypedInstruction<'a>> + TryFrom<TypedInstruction<'a>>,
{
const OPERATOR: &'static str;
fn from_stack(stack: &Stack<'a>) -> Option<Self>;
}
mod macros {
macro_rules! op_impl {
($t:ident $(<$l:lifetime>),*, $e:expr, $n:expr, $body:expr) => {
impl<'a> OperatorTrait<'a> for $t$(<$l>),* {
const OPERATOR: &'static str = $e;
fn from_stack(stack: &Stack<'a>) -> Option<Self> {
if $n != u8::MAX as usize {
if stack.len() != $n {
warn!("wrong stack length {} for operator {}, expected {}", stack.len(), Self::OPERATOR, $n);
}
}
$body(stack).or_else(|| {
warn!("failed to convert operands for operator {}", Self::OPERATOR);
None
})
}
}
impl<'a> From<$t$(<$l>),*> for TypedInstruction<'a> {
fn from(value: $t$(<$l>),*) -> Self {
TypedInstruction::$t(value)
}
}
impl<'a> TryFrom<TypedInstruction<'a>> for $t$(<$l>),* {
type Error = ();
fn try_from(value: TypedInstruction<'a>) -> core::result::Result<Self, Self::Error> {
match value {
TypedInstruction::$t(e) => Ok(e),
_ => Err(())
}
}
}
};
}
macro_rules! op0 {
($t:ident $(<$l:lifetime>),*, $e:expr) => {
crate::content::macros::op_impl!($t$(<$l>),*, $e, 0, |_| Some(Self));
}
}
macro_rules! op1 {
($t:ident $(<$l:lifetime>),*, $e:expr) => {
crate::content::macros::op_impl!($t$(<$l>),*, $e, 1, |stack: &Stack<'a>| {
let shift = stack.len().saturating_sub(1);
Some(Self(stack.get(0 + shift)?))
});
}
}
macro_rules! op_all {
($t:ident $(<$l:lifetime>),*, $e:expr) => {
crate::content::macros::op_impl!($t$(<$l>),*, $e, u8::MAX as usize, |stack: &Stack<'a>|
Some(Self(stack.get_all()?)));
}
}
macro_rules! op2 {
($t:ident $(<$l:lifetime>),*, $e:expr) => {
crate::content::macros::op_impl!($t$(<$l>),*, $e, 2, |stack: &Stack<'a>| {
let shift = stack.len().saturating_sub(2);
Some(Self(stack.get(0 + shift)?, stack.get(1 + shift)?))
});
}
}
macro_rules! op3 {
($t:ident $(<$l:lifetime>),*, $e:expr) => {
crate::content::macros::op_impl!($t$(<$l>),*, $e, 3, |stack: &Stack<'a>| {
let shift = stack.len().saturating_sub(3);
Some(Self(stack.get(0 + shift)?, stack.get(1 + shift)?,
stack.get(2 + shift)?))
});
}
}
macro_rules! op4 {
($t:ident $(<$l:lifetime>),*, $e:expr) => {
crate::content::macros::op_impl!($t$(<$l>),*, $e, 4, |stack: &Stack<'a>| {
let shift = stack.len().saturating_sub(4);
Some(Self(stack.get(0 + shift)?, stack.get(1 + shift)?,
stack.get(2 + shift)?, stack.get(3 + shift)?))
});
}
}
macro_rules! op6 {
($t:ident $(<$l:lifetime>),*, $e:expr) => {
crate::content::macros::op_impl!($t$(<$l>),*, $e, 6, |stack: &Stack<'a>| {
let shift = stack.len().saturating_sub(6);
Some(Self(stack.get(0 + shift)?, stack.get(1 + shift)?,
stack.get(2 + shift)?, stack.get(3 + shift)?,
stack.get(4 + shift)?, stack.get(5 + shift)?))
});
}
}
pub(crate) use op_all;
pub(crate) use op_impl;
pub(crate) use op0;
pub(crate) use op1;
pub(crate) use op2;
pub(crate) use op3;
pub(crate) use op4;
pub(crate) use op6;
}