Skip to main content

hayro_syntax/object/
stream.rs

1//! Streams.
2
3use crate::crypto::DecryptionTarget;
4use crate::filter::Filter;
5use crate::object;
6use crate::object::Dict;
7use crate::object::Name;
8use crate::object::dict::keys::{DECODE_PARMS, DP, F, FILTER, LENGTH, TYPE};
9use crate::object::{Array, ObjectIdentifier};
10use crate::object::{Object, ObjectLike, ObjectRefLike};
11use crate::reader::Reader;
12use crate::reader::{Readable, ReaderContext, ReaderExt, Skippable};
13use crate::trivia::is_white_space_character;
14use crate::util::{OptionLog, find_needle};
15use alloc::borrow::Cow;
16use alloc::vec::Vec;
17use core::fmt::{Debug, Formatter};
18use smallvec::SmallVec;
19
20struct FiltersAndParams<'a> {
21    filters: SmallVec<[Filter; 2]>,
22    params: SmallVec<[Dict<'a>; 2]>,
23}
24
25/// A stream of arbitrary data.
26#[derive(Clone)]
27pub struct Stream<'a> {
28    dict: Dict<'a>,
29    data: &'a [u8],
30}
31
32impl PartialEq for Stream<'_> {
33    fn eq(&self, other: &Self) -> bool {
34        self.dict == other.dict && self.data == other.data
35    }
36}
37
38/// Additional parameters for decoding images.
39#[derive(Clone, PartialEq, Default)]
40pub struct ImageDecodeParams {
41    /// Whether the color space of the image is an indexed color space.
42    pub is_indexed: bool,
43    /// The bits per component of the image, if that information is available.
44    pub bpc: Option<u8>,
45    /// The components per channel of the image, if that information is available.
46    pub num_components: Option<u8>,
47    /// A target resolution for the image. Note that this is only a hint so that
48    /// in case it's possible, a version of the image will be extracted that
49    /// is as close as possible to the hinted dimension.
50    pub target_dimension: Option<(u32, u32)>,
51    /// The width of the image as indicated by the image dictionary.
52    pub width: u32,
53    /// The height of the image as indicated by the image dictionary.
54    pub height: u32,
55}
56
57impl<'a> Stream<'a> {
58    pub(crate) fn new(data: &'a [u8], dict: Dict<'a>) -> Self {
59        Self { dict, data }
60    }
61
62    fn filters_and_params(&self) -> FiltersAndParams<'a> {
63        let mut collected_filters = SmallVec::new();
64        let mut collected_params = SmallVec::new();
65
66        if let Some(filter) = self
67            .dict
68            .get::<Name<'_>>(F)
69            .or_else(|| self.dict.get::<Name<'_>>(FILTER))
70            .and_then(Filter::from_name)
71        {
72            let params = self
73                .dict
74                .get::<Dict<'_>>(DP)
75                .or_else(|| self.dict.get::<Dict<'_>>(DECODE_PARMS))
76                .unwrap_or_default();
77
78            collected_filters.push(filter);
79            collected_params.push(params);
80        } else if let Some(filters) = self
81            .dict
82            .get::<Array<'_>>(F)
83            .or_else(|| self.dict.get::<Array<'_>>(FILTER))
84        {
85            let filters = filters.iter::<Name<'_>>().map(Filter::from_name);
86            let mut params = self
87                .dict
88                .get::<Array<'_>>(DP)
89                .or_else(|| self.dict.get::<Array<'_>>(DECODE_PARMS))
90                .map(|a| a.iter::<Object<'_>>());
91
92            for filter in filters {
93                let params = params
94                    .as_mut()
95                    .and_then(|p| p.next())
96                    .and_then(|p| p.into_dict())
97                    .unwrap_or_default();
98
99                if let Some(filter) = filter {
100                    collected_filters.push(filter);
101                    collected_params.push(params);
102                }
103            }
104        }
105
106        FiltersAndParams {
107            filters: collected_filters,
108            params: collected_params,
109        }
110    }
111
112    /// Return the raw, decrypted data of the stream.
113    ///
114    /// Stream filters will not be applied.
115    pub fn raw_data(&self) -> Cow<'a, [u8]> {
116        let ctx = self.dict.ctx();
117
118        if ctx.xref().needs_decryption(ctx)
119            && self
120                .dict
121                .get::<object::String<'_>>(TYPE)
122                .map(|t| t.as_ref() != b"XRef")
123                .unwrap_or(true)
124        {
125            Cow::Owned(
126                ctx.xref()
127                    .decrypt(
128                        self.dict.obj_id().unwrap(),
129                        self.data,
130                        DecryptionTarget::Stream,
131                    )
132                    // TODO: MAybe an error would be better?
133                    .unwrap_or_default(),
134            )
135        } else {
136            Cow::Borrowed(self.data)
137        }
138    }
139
140    /// Return the raw, underlying dictionary of the stream.
141    pub fn dict(&self) -> &Dict<'a> {
142        &self.dict
143    }
144
145    /// Return the object identifier of the stream.
146    pub fn obj_id(&self) -> ObjectIdentifier {
147        self.dict.obj_id().unwrap()
148    }
149
150    /// Return the filters that are applied to the stream.
151    pub fn filters(&self) -> SmallVec<[Filter; 2]> {
152        self.filters_and_params().filters
153    }
154
155    /// Return the decoded data of the stream.
156    ///
157    /// Note that the result of this method will not be cached, so calling it multiple
158    /// times is expensive.
159    pub fn decoded(&self) -> Result<Cow<'a, [u8]>, DecodeFailure> {
160        self.decoded_image(&ImageDecodeParams::default())
161            .map(|r| r.data)
162    }
163
164    /// Return the decoded data of the stream, and return image metadata
165    /// if available.
166    pub fn decoded_image(
167        &self,
168        image_params: &ImageDecodeParams,
169    ) -> Result<FilterResult<'a>, DecodeFailure> {
170        let data = self.raw_data();
171        let filters_and_params = self.filters_and_params();
172
173        let mut current: Option<FilterResult<'a>> = None;
174
175        for (filter, params) in filters_and_params
176            .filters
177            .iter()
178            .zip(filters_and_params.params.iter())
179        {
180            let new = filter.apply(
181                current.as_ref().map(|c| c.data.as_ref()).unwrap_or(&data),
182                params,
183                image_params,
184            )?;
185            current = Some(new);
186        }
187
188        Ok(current.unwrap_or(FilterResult {
189            data,
190            image_data: None,
191        }))
192    }
193}
194
195impl Debug for Stream<'_> {
196    fn fmt(&self, f: &mut Formatter<'_>) -> core::fmt::Result {
197        write!(f, "Stream (len: {:?})", self.data.len())
198    }
199}
200
201impl Skippable for Stream<'_> {
202    fn skip(_: &mut Reader<'_>, _: bool) -> Option<()> {
203        // A stream can never appear in a dict/array, so it should never be skipped.
204        warn!("attempted to skip a stream object");
205
206        None
207    }
208}
209
210impl<'a> Readable<'a> for Stream<'a> {
211    fn read(r: &mut Reader<'a>, ctx: &ReaderContext<'a>) -> Option<Self> {
212        let dict = r.read_with_context::<Dict<'_>>(ctx)?;
213
214        if dict.contains_key(F) {
215            warn!("encountered stream referencing external file, which is unsupported");
216
217            return None;
218        }
219
220        let offset = r.offset();
221        parse_proper(r, &dict)
222            .or_else(|| {
223                warn!("failed to parse stream, trying to parse it manually");
224
225                r.jump(offset);
226                parse_fallback(r, &dict)
227            })
228            .error_none("was unable to manually parse the stream")
229    }
230}
231
232#[derive(Debug, Copy, Clone)]
233/// A failure that can occur during decoding a data stream.
234pub enum DecodeFailure {
235    /// An image stream failed to decode.
236    ImageDecode,
237    /// A data stream failed to decode.
238    StreamDecode,
239    /// A failure occurred while decrypting a file.
240    Decryption,
241    /// An unknown failure occurred.
242    Unknown,
243}
244
245/// An image color space.
246#[derive(Debug, Copy, Clone)]
247pub enum ImageColorSpace {
248    /// Grayscale color space.
249    Gray,
250    /// RGB color space.
251    Rgb,
252    /// CMYK color space.
253    Cmyk,
254    /// An unknown color space.
255    Unknown(u8),
256}
257
258/// Additional data that is extracted from some image streams.
259pub struct ImageData {
260    /// An optional alpha channel of the image.
261    pub alpha: Option<Vec<u8>>,
262    /// The color space of the image.
263    pub color_space: Option<ImageColorSpace>,
264    /// The bits per component of the image.
265    pub bits_per_component: u8,
266    /// The width of the image.
267    pub width: u32,
268    /// The height of the image.
269    pub height: u32,
270}
271
272/// The result of applying a filter.
273pub struct FilterResult<'a> {
274    /// The decoded data.
275    pub data: Cow<'a, [u8]>,
276    /// Additional data that is extracted from JPX image streams.
277    pub image_data: Option<ImageData>,
278}
279
280impl FilterResult<'_> {
281    pub(crate) fn from_data(data: Vec<u8>) -> Self {
282        Self {
283            data: Cow::Owned(data),
284            image_data: None,
285        }
286    }
287}
288
289fn parse_proper<'a>(r: &mut Reader<'a>, dict: &Dict<'a>) -> Option<Stream<'a>> {
290    let length = dict.get::<u32>(LENGTH)?;
291
292    r.skip_white_spaces_and_comments();
293    r.forward_tag(b"stream")?;
294    r.forward_tag(b"\n")
295        .or_else(|| r.forward_tag(b"\r\n"))
296        .or_else(|| r.forward_tag(b"\r"))?;
297    let data = r.read_bytes(length as usize)?;
298    r.skip_white_spaces();
299    r.forward_tag(b"endstream")?;
300
301    Some(Stream::new(data, dict.clone()))
302}
303
304fn parse_fallback<'a>(r: &mut Reader<'a>, dict: &Dict<'a>) -> Option<Stream<'a>> {
305    let stream_offset = find_needle(r.tail()?, b"stream")?;
306    r.read_bytes(stream_offset)?;
307    r.forward_tag(b"stream")?;
308
309    r.forward_tag(b"\n")
310        .or_else(|| r.forward_tag(b"\r\n"))
311        // Technically not allowed, but no reason to not try it.
312        .or_else(|| r.forward_tag(b"\r"))?;
313
314    let tail = r.tail()?;
315    let endstream_offset = find_needle(tail, b"endstream")?;
316    let data_end = trim_trailing_ascii_whitespace(&tail[..endstream_offset]);
317    let data = tail.get(..data_end)?;
318
319    r.read_bytes(endstream_offset)?;
320    r.skip_white_spaces();
321    r.forward_tag(b"endstream")?;
322
323    Some(Stream::new(data, dict.clone()))
324}
325
326fn trim_trailing_ascii_whitespace(data: &[u8]) -> usize {
327    let mut end = data.len();
328
329    while data
330        .get(end.wrapping_sub(1))
331        .copied()
332        .is_some_and(is_white_space_character)
333    {
334        end -= 1;
335    }
336
337    end
338}
339
340impl<'a> TryFrom<Object<'a>> for Stream<'a> {
341    type Error = ();
342
343    fn try_from(value: Object<'a>) -> Result<Self, Self::Error> {
344        match value {
345            Object::Stream(s) => Ok(s),
346            _ => Err(()),
347        }
348    }
349}
350
351impl<'a> ObjectLike<'a> for Stream<'a> {}
352impl<'a> ObjectRefLike<'a> for Stream<'a> {
353    fn cast_ref<'b>(obj: &'b Object<'a>) -> Option<&'b Self> {
354        match obj {
355            Object::Stream(stream) => Some(stream),
356            _ => None,
357        }
358    }
359}
360
361#[cfg(test)]
362mod tests {
363    use crate::object::Stream;
364    use crate::reader::Reader;
365    use crate::reader::{ReaderContext, ReaderExt};
366
367    #[test]
368    fn stream() {
369        let data = b"<< /Length 10 >> stream\nabcdefghij\nendstream";
370        let mut r = Reader::new(data);
371        let stream = r
372            .read_with_context::<Stream<'_>>(&ReaderContext::dummy())
373            .unwrap();
374
375        assert_eq!(stream.data, b"abcdefghij");
376    }
377
378    #[test]
379    fn stream_fallback() {
380        let data = b"<< /Length 999 >> stream\nabcdefghij\nendstream";
381        let mut r = Reader::new(data);
382        let stream = r
383            .read_with_context::<Stream<'_>>(&ReaderContext::dummy())
384            .unwrap();
385
386        assert_eq!(stream.data, b"abcdefghij");
387    }
388}