1use crate::crypto::DecryptionTarget;
4use crate::filter::Filter;
5use crate::object;
6use crate::object::Dict;
7use crate::object::Name;
8use crate::object::dict::keys::{DECODE_PARMS, DP, F, FILTER, LENGTH, TYPE};
9use crate::object::{Array, ObjectIdentifier};
10use crate::object::{Object, ObjectLike, ObjectRefLike};
11use crate::reader::Reader;
12use crate::reader::{Readable, ReaderContext, ReaderExt, Skippable};
13use crate::trivia::is_white_space_character;
14use crate::util::{OptionLog, find_needle};
15use alloc::borrow::Cow;
16use alloc::vec::Vec;
17use core::fmt::{Debug, Formatter};
18use smallvec::SmallVec;
19
20struct FiltersAndParams<'a> {
21 filters: SmallVec<[Filter; 2]>,
22 params: SmallVec<[Dict<'a>; 2]>,
23}
24
25#[derive(Clone)]
27pub struct Stream<'a> {
28 dict: Dict<'a>,
29 data: &'a [u8],
30}
31
32impl PartialEq for Stream<'_> {
33 fn eq(&self, other: &Self) -> bool {
34 self.dict == other.dict && self.data == other.data
35 }
36}
37
38#[derive(Clone, PartialEq, Default)]
40pub struct ImageDecodeParams {
41 pub is_indexed: bool,
43 pub bpc: Option<u8>,
45 pub num_components: Option<u8>,
47 pub target_dimension: Option<(u32, u32)>,
51 pub width: u32,
53 pub height: u32,
55}
56
57impl<'a> Stream<'a> {
58 pub(crate) fn new(data: &'a [u8], dict: Dict<'a>) -> Self {
59 Self { dict, data }
60 }
61
62 fn filters_and_params(&self) -> FiltersAndParams<'a> {
63 let mut collected_filters = SmallVec::new();
64 let mut collected_params = SmallVec::new();
65
66 if let Some(filter) = self
67 .dict
68 .get::<Name<'_>>(F)
69 .or_else(|| self.dict.get::<Name<'_>>(FILTER))
70 .and_then(Filter::from_name)
71 {
72 let params = self
73 .dict
74 .get::<Dict<'_>>(DP)
75 .or_else(|| self.dict.get::<Dict<'_>>(DECODE_PARMS))
76 .unwrap_or_default();
77
78 collected_filters.push(filter);
79 collected_params.push(params);
80 } else if let Some(filters) = self
81 .dict
82 .get::<Array<'_>>(F)
83 .or_else(|| self.dict.get::<Array<'_>>(FILTER))
84 {
85 let filters = filters.iter::<Name<'_>>().map(Filter::from_name);
86 let mut params = self
87 .dict
88 .get::<Array<'_>>(DP)
89 .or_else(|| self.dict.get::<Array<'_>>(DECODE_PARMS))
90 .map(|a| a.iter::<Object<'_>>());
91
92 for filter in filters {
93 let params = params
94 .as_mut()
95 .and_then(|p| p.next())
96 .and_then(|p| p.into_dict())
97 .unwrap_or_default();
98
99 if let Some(filter) = filter {
100 collected_filters.push(filter);
101 collected_params.push(params);
102 }
103 }
104 }
105
106 FiltersAndParams {
107 filters: collected_filters,
108 params: collected_params,
109 }
110 }
111
112 pub fn raw_data(&self) -> Cow<'a, [u8]> {
116 let ctx = self.dict.ctx();
117
118 if ctx.xref().needs_decryption(ctx)
119 && self
120 .dict
121 .get::<object::String<'_>>(TYPE)
122 .map(|t| t.as_ref() != b"XRef")
123 .unwrap_or(true)
124 {
125 Cow::Owned(
126 ctx.xref()
127 .decrypt(
128 self.dict.obj_id().unwrap(),
129 self.data,
130 DecryptionTarget::Stream,
131 )
132 .unwrap_or_default(),
134 )
135 } else {
136 Cow::Borrowed(self.data)
137 }
138 }
139
140 pub fn dict(&self) -> &Dict<'a> {
142 &self.dict
143 }
144
145 pub fn obj_id(&self) -> ObjectIdentifier {
147 self.dict.obj_id().unwrap()
148 }
149
150 pub fn filters(&self) -> SmallVec<[Filter; 2]> {
152 self.filters_and_params().filters
153 }
154
155 pub fn decoded(&self) -> Result<Cow<'a, [u8]>, DecodeFailure> {
160 self.decoded_image(&ImageDecodeParams::default())
161 .map(|r| r.data)
162 }
163
164 pub fn decoded_image(
167 &self,
168 image_params: &ImageDecodeParams,
169 ) -> Result<FilterResult<'a>, DecodeFailure> {
170 let data = self.raw_data();
171 let filters_and_params = self.filters_and_params();
172
173 let mut current: Option<FilterResult<'a>> = None;
174
175 for (filter, params) in filters_and_params
176 .filters
177 .iter()
178 .zip(filters_and_params.params.iter())
179 {
180 let new = filter.apply(
181 current.as_ref().map(|c| c.data.as_ref()).unwrap_or(&data),
182 params,
183 image_params,
184 )?;
185 current = Some(new);
186 }
187
188 Ok(current.unwrap_or(FilterResult {
189 data,
190 image_data: None,
191 }))
192 }
193}
194
195impl Debug for Stream<'_> {
196 fn fmt(&self, f: &mut Formatter<'_>) -> core::fmt::Result {
197 write!(f, "Stream (len: {:?})", self.data.len())
198 }
199}
200
201impl Skippable for Stream<'_> {
202 fn skip(_: &mut Reader<'_>, _: bool) -> Option<()> {
203 warn!("attempted to skip a stream object");
205
206 None
207 }
208}
209
210impl<'a> Readable<'a> for Stream<'a> {
211 fn read(r: &mut Reader<'a>, ctx: &ReaderContext<'a>) -> Option<Self> {
212 let dict = r.read_with_context::<Dict<'_>>(ctx)?;
213
214 if dict.contains_key(F) {
215 warn!("encountered stream referencing external file, which is unsupported");
216
217 return None;
218 }
219
220 let offset = r.offset();
221 parse_proper(r, &dict)
222 .or_else(|| {
223 warn!("failed to parse stream, trying to parse it manually");
224
225 r.jump(offset);
226 parse_fallback(r, &dict)
227 })
228 .error_none("was unable to manually parse the stream")
229 }
230}
231
232#[derive(Debug, Copy, Clone)]
233pub enum DecodeFailure {
235 ImageDecode,
237 StreamDecode,
239 Decryption,
241 Unknown,
243}
244
245#[derive(Debug, Copy, Clone)]
247pub enum ImageColorSpace {
248 Gray,
250 Rgb,
252 Cmyk,
254 Unknown(u8),
256}
257
258pub struct ImageData {
260 pub alpha: Option<Vec<u8>>,
262 pub color_space: Option<ImageColorSpace>,
264 pub bits_per_component: u8,
266 pub width: u32,
268 pub height: u32,
270}
271
272pub struct FilterResult<'a> {
274 pub data: Cow<'a, [u8]>,
276 pub image_data: Option<ImageData>,
278}
279
280impl FilterResult<'_> {
281 pub(crate) fn from_data(data: Vec<u8>) -> Self {
282 Self {
283 data: Cow::Owned(data),
284 image_data: None,
285 }
286 }
287}
288
289fn parse_proper<'a>(r: &mut Reader<'a>, dict: &Dict<'a>) -> Option<Stream<'a>> {
290 let length = dict.get::<u32>(LENGTH)?;
291
292 r.skip_white_spaces_and_comments();
293 r.forward_tag(b"stream")?;
294 r.forward_tag(b"\n")
295 .or_else(|| r.forward_tag(b"\r\n"))
296 .or_else(|| r.forward_tag(b"\r"))?;
297 let data = r.read_bytes(length as usize)?;
298 r.skip_white_spaces();
299 r.forward_tag(b"endstream")?;
300
301 Some(Stream::new(data, dict.clone()))
302}
303
304fn parse_fallback<'a>(r: &mut Reader<'a>, dict: &Dict<'a>) -> Option<Stream<'a>> {
305 let stream_offset = find_needle(r.tail()?, b"stream")?;
306 r.read_bytes(stream_offset)?;
307 r.forward_tag(b"stream")?;
308
309 r.forward_tag(b"\n")
310 .or_else(|| r.forward_tag(b"\r\n"))
311 .or_else(|| r.forward_tag(b"\r"))?;
313
314 let tail = r.tail()?;
315 let endstream_offset = find_needle(tail, b"endstream")?;
316 let data_end = trim_trailing_ascii_whitespace(&tail[..endstream_offset]);
317 let data = tail.get(..data_end)?;
318
319 r.read_bytes(endstream_offset)?;
320 r.skip_white_spaces();
321 r.forward_tag(b"endstream")?;
322
323 Some(Stream::new(data, dict.clone()))
324}
325
326fn trim_trailing_ascii_whitespace(data: &[u8]) -> usize {
327 let mut end = data.len();
328
329 while data
330 .get(end.wrapping_sub(1))
331 .copied()
332 .is_some_and(is_white_space_character)
333 {
334 end -= 1;
335 }
336
337 end
338}
339
340impl<'a> TryFrom<Object<'a>> for Stream<'a> {
341 type Error = ();
342
343 fn try_from(value: Object<'a>) -> Result<Self, Self::Error> {
344 match value {
345 Object::Stream(s) => Ok(s),
346 _ => Err(()),
347 }
348 }
349}
350
351impl<'a> ObjectLike<'a> for Stream<'a> {}
352impl<'a> ObjectRefLike<'a> for Stream<'a> {
353 fn cast_ref<'b>(obj: &'b Object<'a>) -> Option<&'b Self> {
354 match obj {
355 Object::Stream(stream) => Some(stream),
356 _ => None,
357 }
358 }
359}
360
361#[cfg(test)]
362mod tests {
363 use crate::object::Stream;
364 use crate::reader::Reader;
365 use crate::reader::{ReaderContext, ReaderExt};
366
367 #[test]
368 fn stream() {
369 let data = b"<< /Length 10 >> stream\nabcdefghij\nendstream";
370 let mut r = Reader::new(data);
371 let stream = r
372 .read_with_context::<Stream<'_>>(&ReaderContext::dummy())
373 .unwrap();
374
375 assert_eq!(stream.data, b"abcdefghij");
376 }
377
378 #[test]
379 fn stream_fallback() {
380 let data = b"<< /Length 999 >> stream\nabcdefghij\nendstream";
381 let mut r = Reader::new(data);
382 let stream = r
383 .read_with_context::<Stream<'_>>(&ReaderContext::dummy())
384 .unwrap();
385
386 assert_eq!(stream.data, b"abcdefghij");
387 }
388}