hayro_syntax/object/
stream.rs

1//! Streams.
2
3use crate::crypto::DecryptionTarget;
4use crate::filter::Filter;
5use crate::object;
6use crate::object::Dict;
7use crate::object::Name;
8use crate::object::dict::keys::{DECODE_PARMS, DP, F, FILTER, LENGTH, TYPE};
9use crate::object::{Array, ObjectIdentifier};
10use crate::object::{Object, ObjectLike};
11use crate::reader::Reader;
12use crate::reader::{Readable, ReaderContext, ReaderExt, Skippable};
13use crate::util::OptionLog;
14use log::warn;
15use smallvec::SmallVec;
16use std::borrow::Cow;
17use std::fmt::{Debug, Formatter};
18
19/// A stream of arbitrary data.
20#[derive(Clone)]
21pub struct Stream<'a> {
22    dict: Dict<'a>,
23    filters: SmallVec<[Filter; 2]>,
24    filter_params: SmallVec<[Dict<'a>; 2]>,
25    data: &'a [u8],
26}
27
28impl PartialEq for Stream<'_> {
29    fn eq(&self, other: &Self) -> bool {
30        self.dict == other.dict && self.data == other.data
31    }
32}
33
34/// Additional parameters for decoding images.
35#[derive(Clone, PartialEq, Default)]
36pub struct ImageDecodeParams {
37    /// Whether the color space of the image is an indexed color space.
38    pub is_indexed: bool,
39    /// The bits per component of the image, if that information is available.
40    pub bpc: Option<u8>,
41    /// The components per channel of the image, if that information is available.
42    pub num_components: Option<u8>,
43    /// A target resolution for the image. Note that this is only a hint so that
44    /// in case it's possible, a version of the image will be extracted that
45    /// is as close as possible to the hinted dimension.
46    pub target_dimension: Option<(u32, u32)>,
47    /// The width of the image as indicated by the image dictionary.
48    pub width: u32,
49    /// The height of the image as indicated by the image dictionary.
50    pub height: u32,
51}
52
53impl<'a> Stream<'a> {
54    pub(crate) fn new(data: &'a [u8], dict: Dict<'a>) -> Self {
55        let mut collected_filters = SmallVec::new();
56        let mut collected_params = SmallVec::new();
57
58        if let Some(filter) = dict
59            .get::<Name<'_>>(F)
60            .or_else(|| dict.get::<Name<'_>>(FILTER))
61            .and_then(|n| Filter::from_name(n))
62        {
63            let params = dict
64                .get::<Dict<'_>>(DP)
65                .or_else(|| dict.get::<Dict<'_>>(DECODE_PARMS))
66                .unwrap_or_default();
67
68            collected_filters.push(filter);
69            collected_params.push(params);
70        } else if let Some(filters) = dict
71            .get::<Array<'_>>(F)
72            .or_else(|| dict.get::<Array<'_>>(FILTER))
73        {
74            let filters = filters.iter::<Name<'_>>().map(|n| Filter::from_name(n));
75            let mut params = dict
76                .get::<Array<'_>>(DP)
77                .or_else(|| dict.get::<Array<'_>>(DECODE_PARMS))
78                .map(|a| a.iter::<Object<'_>>());
79
80            for filter in filters {
81                let params = params
82                    .as_mut()
83                    .and_then(|p| p.next())
84                    .and_then(|p| p.into_dict())
85                    .unwrap_or_default();
86
87                if let Some(filter) = filter {
88                    collected_filters.push(filter);
89                    collected_params.push(params);
90                }
91            }
92        }
93
94        Self {
95            dict,
96            filters: collected_filters,
97            filter_params: collected_params,
98            data,
99        }
100    }
101
102    /// Return the raw, decrypted data of the stream.
103    ///
104    /// Stream filters will not be applied.
105    pub fn raw_data(&self) -> Cow<'a, [u8]> {
106        let ctx = self.dict.ctx();
107
108        if ctx.xref.needs_decryption(ctx)
109            && self
110                .dict
111                .get::<object::String<'_>>(TYPE)
112                .map(|t| t.get().as_ref() != b"XRef")
113                .unwrap_or(true)
114        {
115            Cow::Owned(
116                ctx.xref
117                    .decrypt(
118                        self.dict.obj_id().unwrap(),
119                        self.data,
120                        DecryptionTarget::Stream,
121                    )
122                    // TODO: MAybe an error would be better?
123                    .unwrap_or_default(),
124            )
125        } else {
126            Cow::Borrowed(self.data)
127        }
128    }
129
130    /// Return the raw, underlying dictionary of the stream.
131    pub fn dict(&self) -> &Dict<'a> {
132        &self.dict
133    }
134
135    /// Return the object identifier of the stream.
136    pub fn obj_id(&self) -> ObjectIdentifier {
137        self.dict.obj_id().unwrap()
138    }
139
140    /// Return the filters that are applied to the stream.
141    pub fn filters(&self) -> &[Filter] {
142        &self.filters
143    }
144
145    /// Return the decoded data of the stream.
146    ///
147    /// Note that the result of this method will not be cached, so calling it multiple
148    /// times is expensive.
149    pub fn decoded(&self) -> Result<Vec<u8>, DecodeFailure> {
150        self.decoded_image(&ImageDecodeParams::default())
151            .map(|r| r.data)
152    }
153
154    /// Return the decoded data of the stream, and return image metadata
155    /// if available.
156    pub fn decoded_image(
157        &self,
158        image_params: &ImageDecodeParams,
159    ) -> Result<FilterResult, DecodeFailure> {
160        let data = self.raw_data();
161
162        let mut current: Option<FilterResult> = None;
163
164        for (filter, params) in self.filters.iter().zip(self.filter_params.iter()) {
165            let new = filter.apply(
166                current.as_ref().map(|c| c.data.as_ref()).unwrap_or(&data),
167                params.clone(),
168                image_params,
169            )?;
170            current = Some(new);
171        }
172
173        Ok(current.unwrap_or(FilterResult {
174            data: data.to_vec(),
175            image_data: None,
176        }))
177    }
178}
179
180impl Debug for Stream<'_> {
181    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
182        write!(f, "Stream (len: {:?})", self.data.len())
183    }
184}
185
186impl Skippable for Stream<'_> {
187    fn skip(_: &mut Reader<'_>, _: bool) -> Option<()> {
188        // A stream can never appear in a dict/array, so it should never be skipped.
189        warn!("attempted to skip a stream object");
190
191        None
192    }
193}
194
195impl<'a> Readable<'a> for Stream<'a> {
196    fn read(r: &mut Reader<'a>, ctx: &ReaderContext<'a>) -> Option<Self> {
197        let dict = r.read_with_context::<Dict<'_>>(ctx)?;
198
199        if dict.contains_key(F) {
200            warn!("encountered stream referencing external file, which is unsupported");
201
202            return None;
203        }
204
205        let offset = r.offset();
206        parse_proper(r, &dict)
207            .or_else(|| {
208                warn!("failed to parse stream, trying to parse it manually");
209
210                r.jump(offset);
211                parse_fallback(r, &dict)
212            })
213            .error_none("was unable to manually parse the stream")
214    }
215}
216
217#[derive(Debug, Copy, Clone)]
218/// A failure that can occur during decoding a data stream.
219pub enum DecodeFailure {
220    /// An image stream failed to decode.
221    ImageDecode,
222    /// A data stream failed to decode.
223    StreamDecode,
224    /// A failure occurred while decrypting a file.
225    Decryption,
226    /// An unknown failure occurred.
227    Unknown,
228}
229
230/// An image color space.
231#[derive(Debug, Copy, Clone)]
232pub enum ImageColorSpace {
233    /// Grayscale color space.
234    Gray,
235    /// RGB color space.
236    Rgb,
237    /// CMYK color space.
238    Cmyk,
239    /// An unknown color space.
240    Unknown(u8),
241}
242
243/// Additional data that is extracted from some image streams.
244pub struct ImageData {
245    /// An optional alpha channel of the image.
246    pub alpha: Option<Vec<u8>>,
247    /// The color space of the image.
248    pub color_space: Option<ImageColorSpace>,
249    /// The bits per component of the image.
250    pub bits_per_component: u8,
251    /// The width of the image.
252    pub width: u32,
253    /// The height of the image.
254    pub height: u32,
255}
256
257/// The result of applying a filter.
258pub struct FilterResult {
259    /// The decoded data.
260    pub data: Vec<u8>,
261    /// Additional data that is extracted from JPX image streams.
262    pub image_data: Option<ImageData>,
263}
264
265impl FilterResult {
266    pub(crate) fn from_data(data: Vec<u8>) -> Self {
267        Self {
268            data,
269            image_data: None,
270        }
271    }
272}
273
274fn parse_proper<'a>(r: &mut Reader<'a>, dict: &Dict<'a>) -> Option<Stream<'a>> {
275    let length = dict.get::<u32>(LENGTH)?;
276
277    r.skip_white_spaces_and_comments();
278    r.forward_tag(b"stream")?;
279    r.forward_tag(b"\n")
280        .or_else(|| r.forward_tag(b"\r\n"))
281        .or_else(|| r.forward_tag(b"\r"))?;
282    let data = r.read_bytes(length as usize)?;
283    r.skip_white_spaces();
284    r.forward_tag(b"endstream")?;
285
286    Some(Stream::new(data, dict.clone()))
287}
288
289fn parse_fallback<'a>(r: &mut Reader<'a>, dict: &Dict<'a>) -> Option<Stream<'a>> {
290    while r.forward_tag(b"stream").is_none() {
291        r.read_byte()?;
292    }
293
294    r.forward_tag(b"\n")
295        .or_else(|| r.forward_tag(b"\r\n"))
296        // Technically not allowed, but no reason to not try it.
297        .or_else(|| r.forward_tag(b"\r"))?;
298
299    let data_start = r.tail()?;
300    let start = r.offset();
301
302    loop {
303        if r.peek_byte()?.is_ascii_whitespace() || r.peek_tag(b"endstream").is_some() {
304            let length = r.offset() - start;
305            let data = data_start.get(..length)?;
306
307            r.skip_white_spaces();
308
309            // This was just a whitespace in the data stream but not actually marking the end
310            // of the stream, so continue searching.
311            if r.forward_tag(b"endstream").is_none() {
312                continue;
313            }
314
315            let stream = Stream::new(data, dict.clone());
316
317            // Seems like we found the end!
318            return Some(stream);
319        } else {
320            r.read_byte()?;
321        }
322    }
323}
324
325impl<'a> TryFrom<Object<'a>> for Stream<'a> {
326    type Error = ();
327
328    fn try_from(value: Object<'a>) -> Result<Self, Self::Error> {
329        match value {
330            Object::Stream(s) => Ok(s),
331            _ => Err(()),
332        }
333    }
334}
335
336impl<'a> ObjectLike<'a> for Stream<'a> {}
337
338#[cfg(test)]
339mod tests {
340    use crate::object::Stream;
341    use crate::reader::Reader;
342    use crate::reader::{ReaderContext, ReaderExt};
343
344    #[test]
345    fn stream() {
346        let data = b"<< /Length 10 >> stream\nabcdefghij\nendstream";
347        let mut r = Reader::new(data);
348        let stream = r
349            .read_with_context::<Stream<'_>>(&ReaderContext::dummy())
350            .unwrap();
351
352        assert_eq!(stream.data, b"abcdefghij");
353    }
354}