use crate::crypto::DecryptionTarget;
use crate::filter::Filter;
use crate::object;
use crate::object::Dict;
use crate::object::Name;
use crate::object::dict::keys::{DECODE_PARMS, DP, F, FILTER, LENGTH, TYPE};
use crate::object::{Array, ObjectIdentifier};
use crate::object::{Object, ObjectLike};
use crate::reader::Reader;
use crate::reader::{Readable, ReaderContext, ReaderExt, Skippable};
use crate::sync::Arc;
use crate::util::OptionLog;
use alloc::borrow::Cow;
use alloc::vec::Vec;
use core::fmt::{Debug, Formatter};
use log::warn;
use smallvec::SmallVec;
#[derive(Clone)]
struct StreamInner<'a> {
dict: Dict<'a>,
filters: SmallVec<[Filter; 2]>,
filter_params: SmallVec<[Dict<'a>; 2]>,
data: &'a [u8],
stream_byte_limit: u64,
}
#[derive(Clone)]
pub struct Stream<'a>(Arc<StreamInner<'a>>);
impl PartialEq for Stream<'_> {
fn eq(&self, other: &Self) -> bool {
self.0.dict == other.0.dict && self.0.data == other.0.data
}
}
#[derive(Clone, PartialEq, Default)]
pub struct ImageDecodeParams {
pub is_indexed: bool,
pub bpc: Option<u8>,
pub num_components: Option<u8>,
pub target_dimension: Option<(u32, u32)>,
pub width: u32,
pub height: u32,
}
impl<'a> Stream<'a> {
pub(crate) fn new(data: &'a [u8], dict: Dict<'a>, stream_byte_limit: u64) -> Self {
let mut collected_filters = SmallVec::new();
let mut collected_params = SmallVec::new();
if let Some(filter) = dict
.get::<Name>(F)
.or_else(|| dict.get::<Name>(FILTER))
.and_then(Filter::from_name)
{
let params = dict
.get::<Dict<'_>>(DP)
.or_else(|| dict.get::<Dict<'_>>(DECODE_PARMS))
.unwrap_or_default();
collected_filters.push(filter);
collected_params.push(params);
} else if let Some(filters) = dict
.get::<Array<'_>>(F)
.or_else(|| dict.get::<Array<'_>>(FILTER))
{
let filters = filters.iter::<Name>().map(Filter::from_name);
let mut params = dict
.get::<Array<'_>>(DP)
.or_else(|| dict.get::<Array<'_>>(DECODE_PARMS))
.map(|a| a.iter::<Object<'_>>());
for filter in filters {
let params = params
.as_mut()
.and_then(|p| p.next())
.and_then(|p| p.into_dict())
.unwrap_or_default();
if let Some(filter) = filter {
collected_filters.push(filter);
collected_params.push(params);
}
}
}
Self(Arc::new(StreamInner {
dict,
filters: collected_filters,
filter_params: collected_params,
data,
stream_byte_limit,
}))
}
pub fn raw_data(&self) -> Cow<'a, [u8]> {
let ctx = self.0.dict.ctx();
if ctx.xref().needs_decryption(ctx)
&& self
.0
.dict
.get::<object::String>(TYPE)
.map(|t| t.as_ref() != b"XRef")
.unwrap_or(true)
{
if let Some(obj_id) = self.0.dict.obj_id() {
Cow::Owned(
ctx.xref()
.decrypt(obj_id, self.0.data, DecryptionTarget::Stream)
.unwrap_or_default(),
)
} else {
Cow::Borrowed(self.0.data)
}
} else {
Cow::Borrowed(self.0.data)
}
}
pub fn dict(&self) -> &Dict<'a> {
&self.0.dict
}
pub fn obj_id(&self) -> Option<ObjectIdentifier> {
self.0.dict.obj_id()
}
pub fn filters(&self) -> &[Filter] {
&self.0.filters
}
pub fn decoded(&self) -> Result<Vec<u8>, DecodeFailure> {
self.decoded_image(&ImageDecodeParams::default())
.map(|r| r.data)
}
pub fn decoded_image(
&self,
image_params: &ImageDecodeParams,
) -> Result<FilterResult, DecodeFailure> {
if let Some(limit) = self.0.dict.ctx().load_limits().image_pixel_limit()
&& image_params.width > 0
&& image_params.height > 0
{
let pixels =
u64::from(image_params.width).saturating_mul(u64::from(image_params.height));
if pixels > u64::from(limit) {
warn!("image pixel count {pixels} exceeds limit {limit}, stopping image decode");
return Err(DecodeFailure::ImageDecode);
}
}
let data = self.raw_data();
let mut current: Option<FilterResult> = None;
for (filter, params) in self.0.filters.iter().zip(self.0.filter_params.iter()) {
let new = filter.apply(
current.as_ref().map(|c| c.data.as_ref()).unwrap_or(&data),
params.clone(),
image_params,
)?;
current = Some(new);
}
let result = current.unwrap_or(FilterResult {
data: data.to_vec(),
image_data: None,
});
let limit = self.0.stream_byte_limit;
if limit != u64::MAX {
let observed = result.data.len() as u64;
if observed > limit {
warn!("decoded stream size {observed} exceeds limit {limit}, stopping decode");
return Err(DecodeFailure::StreamTooLarge { observed, limit });
}
}
Ok(result)
}
}
impl Debug for Stream<'_> {
fn fmt(&self, f: &mut Formatter<'_>) -> core::fmt::Result {
write!(f, "Stream (len: {:?})", self.0.data.len())
}
}
impl Skippable for Stream<'_> {
fn skip(_: &mut Reader<'_>, _: bool) -> Option<()> {
warn!("attempted to skip a stream object");
None
}
}
impl<'a> Readable<'a> for Stream<'a> {
fn read(r: &mut Reader<'a>, ctx: &ReaderContext<'a>) -> Option<Self> {
let dict = r.read_with_context::<Dict<'_>>(ctx)?;
if dict.contains_key(F) {
warn!("encountered stream referencing external file, which is unsupported");
return None;
}
let stream_byte_limit = ctx.load_limits().stream_byte_limit().unwrap_or(u64::MAX);
let offset = r.offset();
parse_proper(r, &dict, stream_byte_limit)
.or_else(|| {
warn!("failed to parse stream, trying to parse it manually");
r.jump(offset);
parse_fallback(r, &dict, stream_byte_limit)
})
.error_none("was unable to manually parse the stream")
}
}
#[derive(Debug, Copy, Clone)]
pub enum DecodeFailure {
ImageDecode,
StreamDecode,
Decryption,
Unknown,
StreamTooLarge {
observed: u64,
limit: u64,
},
}
#[derive(Debug, Copy, Clone)]
pub enum ImageColorSpace {
Gray,
Rgb,
RgbFromYCbCr,
Cmyk,
Unknown(u8),
}
pub struct ImageData {
pub alpha: Option<Vec<u8>>,
pub color_space: Option<ImageColorSpace>,
pub bits_per_component: u8,
pub width: u32,
pub height: u32,
}
pub struct FilterResult {
pub data: Vec<u8>,
pub image_data: Option<ImageData>,
}
impl FilterResult {
pub(crate) fn from_data(data: Vec<u8>) -> Self {
Self {
data,
image_data: None,
}
}
}
fn parse_proper<'a>(
r: &mut Reader<'a>,
dict: &Dict<'a>,
stream_byte_limit: u64,
) -> Option<Stream<'a>> {
let length = dict.get::<u32>(LENGTH)?;
r.skip_white_spaces_and_comments();
r.forward_tag(b"stream")?;
while r.peek_byte().is_some_and(|b| b == b' ' || b == b'\t') {
r.forward();
}
r.forward_tag(b"\n")
.or_else(|| r.forward_tag(b"\r\n"))
.or_else(|| r.forward_tag(b"\r"))?;
let data = r.read_bytes(length as usize)?;
r.skip_white_spaces();
r.forward_tag(b"endstream")?;
Some(Stream::new(data, dict.clone(), stream_byte_limit))
}
fn parse_fallback<'a>(
r: &mut Reader<'a>,
dict: &Dict<'a>,
stream_byte_limit: u64,
) -> Option<Stream<'a>> {
while r.forward_tag(b"stream").is_none() {
r.read_byte()?;
}
while r.peek_byte().is_some_and(|b| b == b' ' || b == b'\t') {
r.forward();
}
r.forward_tag(b"\n")
.or_else(|| r.forward_tag(b"\r\n"))
.or_else(|| r.forward_tag(b"\r"))?;
let data_start = r.tail()?;
let start = r.offset();
loop {
if r.peek_byte()?.is_ascii_whitespace() || r.peek_tag(b"endstream").is_some() {
let length = r.offset() - start;
let data = data_start.get(..length)?;
r.skip_white_spaces();
if r.forward_tag(b"endstream").is_none() {
continue;
}
let stream = Stream::new(data, dict.clone(), stream_byte_limit);
return Some(stream);
} else {
r.read_byte()?;
}
}
}
impl<'a> TryFrom<Object<'a>> for Stream<'a> {
type Error = ();
fn try_from(value: Object<'a>) -> Result<Self, Self::Error> {
match value {
Object::Stream(s) => Ok(s),
_ => Err(()),
}
}
}
impl<'a> ObjectLike<'a> for Stream<'a> {}
#[cfg(test)]
mod tests {
use crate::object::Stream;
use crate::pdf::PdfLoadLimits;
use crate::reader::Reader;
use crate::reader::{ReaderContext, ReaderExt};
use super::DecodeFailure;
#[test]
fn stream() {
let data = b"<< /Length 10 >> stream\nabcdefghij\nendstream";
let mut r = Reader::new(data);
let stream = r
.read_with_context::<Stream<'_>>(&ReaderContext::dummy())
.unwrap();
assert_eq!(stream.0.data, b"abcdefghij");
}
#[test]
fn decoded_no_limit() {
let data = b"<< /Length 5 >> stream\nhello\nendstream";
let mut r = Reader::new(data);
let stream = r
.read_with_context::<Stream<'_>>(&ReaderContext::dummy())
.unwrap();
let decoded = stream.decoded().unwrap();
assert_eq!(decoded, b"hello");
}
#[test]
fn decoded_exceeds_byte_limit() {
let data = b"<< /Length 10 >> stream\nabcdefghij\nendstream";
let limits = PdfLoadLimits::new().max_stream_bytes(5);
let ctx = ReaderContext::dummy_with_limits(limits);
let mut r = Reader::new(data);
let stream = r.read_with_context::<Stream<'_>>(&ctx).unwrap();
match stream.decoded() {
Err(DecodeFailure::StreamTooLarge { observed, limit }) => {
assert_eq!(observed, 10);
assert_eq!(limit, 5);
}
other => panic!("expected StreamTooLarge, got {other:?}"),
}
}
#[test]
fn decoded_at_byte_limit_succeeds() {
let data = b"<< /Length 10 >> stream\nabcdefghij\nendstream";
let limits = PdfLoadLimits::new().max_stream_bytes(10);
let ctx = ReaderContext::dummy_with_limits(limits);
let mut r = Reader::new(data);
let stream = r.read_with_context::<Stream<'_>>(&ctx).unwrap();
let decoded = stream.decoded().unwrap();
assert_eq!(decoded, b"abcdefghij");
}
#[test]
fn flate_decode_exceeds_limit() {
let compressed: &[u8] = &[
0x78, 0x9c, 0x63, 0x60, 0x20, 0x15, 0x00, 0x00, 0x00, 0x32, 0x00, 0x01,
];
let length = compressed.len();
let mut pdf = format!("<< /Length {length} /Filter /FlateDecode >> stream\n").into_bytes();
pdf.extend_from_slice(compressed);
pdf.extend_from_slice(b"\nendstream");
let limits = PdfLoadLimits::new().max_stream_bytes(20); let ctx = ReaderContext::dummy_with_limits(limits);
let mut r = Reader::new(&pdf);
let stream = r.read_with_context::<Stream<'_>>(&ctx).unwrap();
match stream.decoded() {
Err(DecodeFailure::StreamTooLarge { .. }) => {}
other => panic!("expected StreamTooLarge, got {other:?}"),
}
}
#[test]
fn flate_decode_within_limit() {
let compressed: &[u8] = &[
0x78, 0x9c, 0x63, 0x60, 0x20, 0x15, 0x00, 0x00, 0x00, 0x32, 0x00, 0x01,
];
let length = compressed.len();
let mut pdf = format!("<< /Length {length} /Filter /FlateDecode >> stream\n").into_bytes();
pdf.extend_from_slice(compressed);
pdf.extend_from_slice(b"\nendstream");
let limits = PdfLoadLimits::new().max_stream_bytes(50);
let ctx = ReaderContext::dummy_with_limits(limits);
let mut r = Reader::new(&pdf);
let stream = r.read_with_context::<Stream<'_>>(&ctx).unwrap();
let decoded = stream.decoded().unwrap();
assert_eq!(decoded.len(), 50);
assert!(decoded.iter().all(|&b| b == 0));
}
#[test]
fn flate_decode_limit_one_below() {
let compressed: &[u8] = &[
0x78, 0x9c, 0x63, 0x60, 0x20, 0x15, 0x00, 0x00, 0x00, 0x32, 0x00, 0x01,
];
let length = compressed.len();
let mut pdf = format!("<< /Length {length} /Filter /FlateDecode >> stream\n").into_bytes();
pdf.extend_from_slice(compressed);
pdf.extend_from_slice(b"\nendstream");
let limits = PdfLoadLimits::new().max_stream_bytes(49); let ctx = ReaderContext::dummy_with_limits(limits);
let mut r = Reader::new(&pdf);
let stream = r.read_with_context::<Stream<'_>>(&ctx).unwrap();
match stream.decoded() {
Err(DecodeFailure::StreamTooLarge { .. }) => {}
other => panic!("expected StreamTooLarge, got {other:?}"),
}
}
}