1use crate::crypto::DecryptionTarget;
4use crate::filter::Filter;
5use crate::object;
6use crate::object::Dict;
7use crate::object::Name;
8use crate::object::dict::keys::{DECODE_PARMS, DP, F, FILTER, LENGTH, TYPE};
9use crate::object::{Array, ObjectIdentifier};
10use crate::object::{Object, ObjectLike};
11use crate::reader::Reader;
12use crate::reader::{Readable, ReaderContext, ReaderExt, Skippable};
13use crate::sync::Arc;
14use crate::util::OptionLog;
15use alloc::borrow::Cow;
16use alloc::vec::Vec;
17use core::fmt::{Debug, Formatter};
18use log::warn;
19use smallvec::SmallVec;
20
21#[derive(Clone)]
22struct StreamInner<'a> {
23 dict: Dict<'a>,
24 filters: SmallVec<[Filter; 2]>,
25 filter_params: SmallVec<[Dict<'a>; 2]>,
26 data: &'a [u8],
27 stream_byte_limit: u64,
31}
32
33#[derive(Clone)]
35pub struct Stream<'a>(Arc<StreamInner<'a>>);
36
37impl PartialEq for Stream<'_> {
38 fn eq(&self, other: &Self) -> bool {
39 self.0.dict == other.0.dict && self.0.data == other.0.data
40 }
41}
42
43#[derive(Clone, PartialEq, Default)]
45pub struct ImageDecodeParams {
46 pub is_indexed: bool,
48 pub bpc: Option<u8>,
50 pub num_components: Option<u8>,
52 pub target_dimension: Option<(u32, u32)>,
56 pub width: u32,
58 pub height: u32,
60}
61
62impl<'a> Stream<'a> {
63 pub(crate) fn new(data: &'a [u8], dict: Dict<'a>, stream_byte_limit: u64) -> Self {
64 let mut collected_filters = SmallVec::new();
65 let mut collected_params = SmallVec::new();
66
67 if let Some(filter) = dict
68 .get::<Name>(F)
69 .or_else(|| dict.get::<Name>(FILTER))
70 .and_then(Filter::from_name)
71 {
72 let params = dict
73 .get::<Dict<'_>>(DP)
74 .or_else(|| dict.get::<Dict<'_>>(DECODE_PARMS))
75 .unwrap_or_default();
76
77 collected_filters.push(filter);
78 collected_params.push(params);
79 } else if let Some(filters) = dict
80 .get::<Array<'_>>(F)
81 .or_else(|| dict.get::<Array<'_>>(FILTER))
82 {
83 let filters = filters.iter::<Name>().map(Filter::from_name);
84 let mut params = dict
85 .get::<Array<'_>>(DP)
86 .or_else(|| dict.get::<Array<'_>>(DECODE_PARMS))
87 .map(|a| a.iter::<Object<'_>>());
88
89 for filter in filters {
90 let params = params
91 .as_mut()
92 .and_then(|p| p.next())
93 .and_then(|p| p.into_dict())
94 .unwrap_or_default();
95
96 if let Some(filter) = filter {
97 collected_filters.push(filter);
98 collected_params.push(params);
99 }
100 }
101 }
102
103 Self(Arc::new(StreamInner {
104 dict,
105 filters: collected_filters,
106 filter_params: collected_params,
107 data,
108 stream_byte_limit,
109 }))
110 }
111
112 pub fn raw_data(&self) -> Cow<'a, [u8]> {
116 let ctx = self.0.dict.ctx();
117
118 if ctx.xref().needs_decryption(ctx)
119 && self
120 .0
121 .dict
122 .get::<object::String>(TYPE)
123 .map(|t| t.as_ref() != b"XRef")
124 .unwrap_or(true)
125 {
126 if let Some(obj_id) = self.0.dict.obj_id() {
129 Cow::Owned(
130 ctx.xref()
131 .decrypt(obj_id, self.0.data, DecryptionTarget::Stream)
132 .unwrap_or_default(),
133 )
134 } else {
135 Cow::Borrowed(self.0.data)
136 }
137 } else {
138 Cow::Borrowed(self.0.data)
139 }
140 }
141
142 pub fn dict(&self) -> &Dict<'a> {
144 &self.0.dict
145 }
146
147 pub fn obj_id(&self) -> Option<ObjectIdentifier> {
151 self.0.dict.obj_id()
152 }
153
154 pub fn filters(&self) -> &[Filter] {
156 &self.0.filters
157 }
158
159 pub fn decoded(&self) -> Result<Vec<u8>, DecodeFailure> {
164 self.decoded_image(&ImageDecodeParams::default())
165 .map(|r| r.data)
166 }
167
168 pub fn decoded_image(
171 &self,
172 image_params: &ImageDecodeParams,
173 ) -> Result<FilterResult, DecodeFailure> {
174 if let Some(limit) = self.0.dict.ctx().load_limits().image_pixel_limit()
175 && image_params.width > 0
176 && image_params.height > 0
177 {
178 let pixels =
179 u64::from(image_params.width).saturating_mul(u64::from(image_params.height));
180 if pixels > u64::from(limit) {
181 warn!("image pixel count {pixels} exceeds limit {limit}, stopping image decode");
182 return Err(DecodeFailure::ImageDecode);
183 }
184 }
185
186 let data = self.raw_data();
187
188 let mut current: Option<FilterResult> = None;
189
190 for (filter, params) in self.0.filters.iter().zip(self.0.filter_params.iter()) {
191 let new = filter.apply(
192 current.as_ref().map(|c| c.data.as_ref()).unwrap_or(&data),
193 params.clone(),
194 image_params,
195 )?;
196 current = Some(new);
197 }
198
199 let result = current.unwrap_or(FilterResult {
200 data: data.to_vec(),
201 image_data: None,
202 });
203
204 let limit = self.0.stream_byte_limit;
205 if limit != u64::MAX {
206 let observed = result.data.len() as u64;
207 if observed > limit {
208 warn!("decoded stream size {observed} exceeds limit {limit}, stopping decode");
209 return Err(DecodeFailure::StreamTooLarge { observed, limit });
210 }
211 }
212
213 Ok(result)
214 }
215}
216
217impl Debug for Stream<'_> {
218 fn fmt(&self, f: &mut Formatter<'_>) -> core::fmt::Result {
219 write!(f, "Stream (len: {:?})", self.0.data.len())
220 }
221}
222
223impl Skippable for Stream<'_> {
224 fn skip(_: &mut Reader<'_>, _: bool) -> Option<()> {
225 warn!("attempted to skip a stream object");
227
228 None
229 }
230}
231
232impl<'a> Readable<'a> for Stream<'a> {
233 fn read(r: &mut Reader<'a>, ctx: &ReaderContext<'a>) -> Option<Self> {
234 let dict = r.read_with_context::<Dict<'_>>(ctx)?;
235
236 if dict.contains_key(F) {
237 warn!("encountered stream referencing external file, which is unsupported");
238
239 return None;
240 }
241
242 let stream_byte_limit = ctx.load_limits().stream_byte_limit().unwrap_or(u64::MAX);
243 let offset = r.offset();
244 parse_proper(r, &dict, stream_byte_limit)
245 .or_else(|| {
246 warn!("failed to parse stream, trying to parse it manually");
247
248 r.jump(offset);
249 parse_fallback(r, &dict, stream_byte_limit)
250 })
251 .error_none("was unable to manually parse the stream")
252 }
253}
254
255#[derive(Debug, Copy, Clone)]
256pub enum DecodeFailure {
258 ImageDecode,
260 StreamDecode,
262 Decryption,
264 Unknown,
266 StreamTooLarge {
273 observed: u64,
275 limit: u64,
277 },
278}
279
280#[derive(Debug, Copy, Clone)]
282pub enum ImageColorSpace {
283 Gray,
285 Rgb,
287 RgbFromYCbCr,
297 Cmyk,
299 Unknown(u8),
301}
302
303pub struct ImageData {
305 pub alpha: Option<Vec<u8>>,
307 pub color_space: Option<ImageColorSpace>,
309 pub bits_per_component: u8,
311 pub width: u32,
313 pub height: u32,
315}
316
317pub struct FilterResult {
319 pub data: Vec<u8>,
321 pub image_data: Option<ImageData>,
323}
324
325impl FilterResult {
326 pub(crate) fn from_data(data: Vec<u8>) -> Self {
327 Self {
328 data,
329 image_data: None,
330 }
331 }
332}
333
334fn parse_proper<'a>(
335 r: &mut Reader<'a>,
336 dict: &Dict<'a>,
337 stream_byte_limit: u64,
338) -> Option<Stream<'a>> {
339 let length = dict.get::<u32>(LENGTH)?;
340
341 r.skip_white_spaces_and_comments();
342 r.forward_tag(b"stream")?;
343 while r.peek_byte().is_some_and(|b| b == b' ' || b == b'\t') {
347 r.forward();
348 }
349 r.forward_tag(b"\n")
350 .or_else(|| r.forward_tag(b"\r\n"))
351 .or_else(|| r.forward_tag(b"\r"))?;
352 let data = r.read_bytes(length as usize)?;
353 r.skip_white_spaces();
354 r.forward_tag(b"endstream")?;
355
356 Some(Stream::new(data, dict.clone(), stream_byte_limit))
357}
358
359fn parse_fallback<'a>(
360 r: &mut Reader<'a>,
361 dict: &Dict<'a>,
362 stream_byte_limit: u64,
363) -> Option<Stream<'a>> {
364 while r.forward_tag(b"stream").is_none() {
365 r.read_byte()?;
366 }
367
368 while r.peek_byte().is_some_and(|b| b == b' ' || b == b'\t') {
371 r.forward();
372 }
373 r.forward_tag(b"\n")
374 .or_else(|| r.forward_tag(b"\r\n"))
375 .or_else(|| r.forward_tag(b"\r"))?;
377
378 let data_start = r.tail()?;
379 let start = r.offset();
380
381 loop {
382 if r.peek_byte()?.is_ascii_whitespace() || r.peek_tag(b"endstream").is_some() {
383 let length = r.offset() - start;
384 let data = data_start.get(..length)?;
385
386 r.skip_white_spaces();
387
388 if r.forward_tag(b"endstream").is_none() {
391 continue;
392 }
393
394 let stream = Stream::new(data, dict.clone(), stream_byte_limit);
395
396 return Some(stream);
398 } else {
399 r.read_byte()?;
400 }
401 }
402}
403
404impl<'a> TryFrom<Object<'a>> for Stream<'a> {
405 type Error = ();
406
407 fn try_from(value: Object<'a>) -> Result<Self, Self::Error> {
408 match value {
409 Object::Stream(s) => Ok(s),
410 _ => Err(()),
411 }
412 }
413}
414
415impl<'a> ObjectLike<'a> for Stream<'a> {}
416
417#[cfg(test)]
418mod tests {
419 use crate::object::Stream;
420 use crate::pdf::PdfLoadLimits;
421 use crate::reader::Reader;
422 use crate::reader::{ReaderContext, ReaderExt};
423
424 use super::DecodeFailure;
425
426 #[test]
427 fn stream() {
428 let data = b"<< /Length 10 >> stream\nabcdefghij\nendstream";
429 let mut r = Reader::new(data);
430 let stream = r
431 .read_with_context::<Stream<'_>>(&ReaderContext::dummy())
432 .unwrap();
433
434 assert_eq!(stream.0.data, b"abcdefghij");
435 }
436
437 #[test]
439 fn decoded_no_limit() {
440 let data = b"<< /Length 5 >> stream\nhello\nendstream";
441 let mut r = Reader::new(data);
442 let stream = r
443 .read_with_context::<Stream<'_>>(&ReaderContext::dummy())
444 .unwrap();
445
446 let decoded = stream.decoded().unwrap();
447 assert_eq!(decoded, b"hello");
448 }
449
450 #[test]
452 fn decoded_exceeds_byte_limit() {
453 let data = b"<< /Length 10 >> stream\nabcdefghij\nendstream";
455 let limits = PdfLoadLimits::new().max_stream_bytes(5);
456 let ctx = ReaderContext::dummy_with_limits(limits);
457 let mut r = Reader::new(data);
458 let stream = r.read_with_context::<Stream<'_>>(&ctx).unwrap();
459
460 match stream.decoded() {
461 Err(DecodeFailure::StreamTooLarge { observed, limit }) => {
462 assert_eq!(observed, 10);
463 assert_eq!(limit, 5);
464 }
465 other => panic!("expected StreamTooLarge, got {other:?}"),
466 }
467 }
468
469 #[test]
471 fn decoded_at_byte_limit_succeeds() {
472 let data = b"<< /Length 10 >> stream\nabcdefghij\nendstream";
473 let limits = PdfLoadLimits::new().max_stream_bytes(10);
474 let ctx = ReaderContext::dummy_with_limits(limits);
475 let mut r = Reader::new(data);
476 let stream = r.read_with_context::<Stream<'_>>(&ctx).unwrap();
477
478 let decoded = stream.decoded().unwrap();
479 assert_eq!(decoded, b"abcdefghij");
480 }
481}