hayro_syntax/object/
stream.rs

1//! Streams.
2
3use crate::crypto::DecryptionTarget;
4use crate::filter::Filter;
5use crate::object;
6use crate::object::Dict;
7use crate::object::Name;
8use crate::object::dict::keys::{DECODE_PARMS, DP, F, FILTER, LENGTH, TYPE};
9use crate::object::{Array, ObjectIdentifier};
10use crate::object::{Object, ObjectLike};
11use crate::reader::{Readable, Reader, ReaderContext, Skippable};
12use crate::util::OptionLog;
13use log::{info, warn};
14use std::borrow::Cow;
15use std::fmt::{Debug, Formatter};
16
17/// A stream of arbitrary data.
18#[derive(Clone, PartialEq)]
19pub struct Stream<'a> {
20    dict: Dict<'a>,
21    data: &'a [u8],
22}
23
24/// Additional parameters for decoding images.
25#[derive(Clone, PartialEq, Default)]
26pub struct ImageDecodeParams {
27    /// Whether the color space of the image is an indexed color space.
28    pub is_indexed: bool,
29    /// The bits per component of the image, if that information is available.
30    pub bpc: Option<u8>,
31}
32
33impl<'a> Stream<'a> {
34    /// Return the raw, decrypted data of the stream.
35    ///
36    /// Stream filters will not be applied.
37    pub fn raw_data(&self) -> Cow<'a, [u8]> {
38        let ctx = self.dict.ctx();
39
40        if ctx.xref.needs_decryption(ctx)
41            && self
42                .dict
43                .get::<object::String>(TYPE)
44                .map(|t| t.get().as_ref() != b"XRef")
45                .unwrap_or(true)
46        {
47            Cow::Owned(
48                ctx.xref
49                    .decrypt(
50                        self.dict.obj_id().unwrap(),
51                        self.data,
52                        DecryptionTarget::Stream,
53                    )
54                    // TODO: MAybe an error would be better?
55                    .unwrap_or_default(),
56            )
57        } else {
58            Cow::Borrowed(self.data)
59        }
60    }
61
62    /// Return the raw, underlying dictionary of the stream.
63    pub fn dict(&self) -> &Dict<'a> {
64        &self.dict
65    }
66
67    /// Return the object identifier of the stream.
68    pub fn obj_id(&self) -> ObjectIdentifier {
69        self.dict.obj_id().unwrap()
70    }
71
72    /// Return the decoded data of the stream.
73    ///
74    /// Note that the result of this method will not be cached, so calling it multiple
75    /// times is expensive.
76    pub fn decoded(&self) -> Result<Vec<u8>, DecodeFailure> {
77        self.decoded_image(&ImageDecodeParams::default())
78            .map(|r| r.data)
79    }
80
81    /// Return the decoded data of the stream, and return image metadata
82    /// if available.
83    pub fn decoded_image(
84        &self,
85        image_params: &ImageDecodeParams,
86    ) -> Result<FilterResult, DecodeFailure> {
87        let data = self.raw_data();
88
89        if let Some(filter) = self
90            .dict
91            .get::<Name>(F)
92            .or_else(|| self.dict.get::<Name>(FILTER))
93            .and_then(|n| Filter::from_name(n))
94        {
95            let params = self
96                .dict
97                .get::<Dict>(DP)
98                .or_else(|| self.dict.get::<Dict>(DECODE_PARMS));
99
100            filter.apply(&data, params.clone().unwrap_or_default(), image_params)
101        } else if let Some(filters) = self
102            .dict
103            .get::<Array>(F)
104            .or_else(|| self.dict.get::<Array>(FILTER))
105        {
106            let filters = filters
107                .iter::<Name>()
108                .map(|n| Filter::from_name(n))
109                .collect::<Option<Vec<_>>>()
110                .ok_or(DecodeFailure::Unknown)?;
111            let params: Vec<_> = self
112                .dict
113                .get::<Array>(DP)
114                .or_else(|| self.dict.get::<Array>(DECODE_PARMS))
115                .map(|a| a.iter::<Object>().collect())
116                .unwrap_or_default();
117
118            let mut current: Option<FilterResult> = None;
119
120            for (i, filter) in filters.iter().enumerate() {
121                let params = params.get(i).and_then(|p| p.clone().cast::<Dict>());
122
123                let new = filter.apply(
124                    current.as_ref().map(|c| c.data.as_ref()).unwrap_or(&data),
125                    params.clone().unwrap_or_default(),
126                    image_params,
127                )?;
128                current = Some(new);
129            }
130
131            Ok(current.unwrap_or(FilterResult {
132                data: data.to_vec(),
133                image_data: None,
134            }))
135        } else {
136            Ok(FilterResult {
137                data: data.to_vec(),
138                image_data: None,
139            })
140        }
141    }
142
143    pub(crate) fn from_raw(data: &'a [u8], dict: Dict<'a>) -> Self {
144        Self { dict, data }
145    }
146}
147
148impl Debug for Stream<'_> {
149    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
150        write!(f, "Stream (len: {:?})", self.data.len())
151    }
152}
153
154impl Skippable for Stream<'_> {
155    fn skip(_: &mut Reader<'_>, _: bool) -> Option<()> {
156        // A stream can never appear in a dict/array, so it should never be skipped.
157        warn!("attempted to skip a stream object");
158
159        None
160    }
161}
162
163impl<'a> Readable<'a> for Stream<'a> {
164    fn read(r: &mut Reader<'a>, ctx: &ReaderContext<'a>) -> Option<Self> {
165        let dict = r.read_with_context::<Dict>(ctx)?;
166
167        if dict.contains_key(F) {
168            warn!("encountered stream referencing external file, which is unsupported");
169
170            return None;
171        }
172
173        let offset = r.offset();
174        parse_proper(r, &dict)
175            .or_else(|| {
176                warn!("failed to parse stream, trying to parse it manually");
177
178                r.jump(offset);
179                parse_fallback(r, &dict)
180            })
181            .error_none("was unable to manually parse the stream")
182    }
183}
184
185#[derive(Debug, Copy, Clone)]
186/// A failure that can occur during decoding a data stream.
187pub enum DecodeFailure {
188    /// An image stream failed to decode.
189    ImageDecode,
190    /// A data stream failed to decode.
191    StreamDecode,
192    /// A JPEG2000 image was encountered, while the `jpeg2000` feature was disabled.
193    JpxImage,
194    /// A failure occurred while decrypting a file.
195    Decryption,
196    /// An unknown failure occurred.
197    Unknown,
198}
199
200/// An image color space.
201#[derive(Debug, Copy, Clone)]
202pub enum ImageColorSpace {
203    /// Grayscale color space.
204    Gray,
205    /// RGB color space.
206    Rgb,
207    /// CMYK color space.
208    Cmyk,
209}
210
211/// Additional data that is extracted from some image streams.
212pub struct ImageData {
213    /// An optional alpha channel of the image.
214    pub alpha: Option<Vec<u8>>,
215    /// The color space of the image.
216    pub color_space: ImageColorSpace,
217    /// The bits per component of the image.
218    pub bits_per_component: u8,
219}
220
221/// The result of applying a filter.
222pub struct FilterResult {
223    /// The decoded data.
224    pub data: Vec<u8>,
225    /// Additional data that is extracted from JPX image streams.
226    pub image_data: Option<ImageData>,
227}
228
229impl FilterResult {
230    pub(crate) fn from_data(data: Vec<u8>) -> Self {
231        Self {
232            data,
233            image_data: None,
234        }
235    }
236}
237
238fn parse_proper<'a>(r: &mut Reader<'a>, dict: &Dict<'a>) -> Option<Stream<'a>> {
239    let length = dict.get::<u32>(LENGTH)?;
240
241    r.skip_white_spaces_and_comments();
242    r.forward_tag(b"stream")?;
243    r.forward_tag(b"\n")
244        .or_else(|| r.forward_tag(b"\r\n"))
245        .or_else(|| r.forward_tag(b"\r"))?;
246    let data = r.read_bytes(length as usize)?;
247    r.skip_white_spaces();
248    r.forward_tag(b"endstream")?;
249
250    Some(Stream {
251        data,
252        dict: dict.clone(),
253    })
254}
255
256fn parse_fallback<'a>(r: &mut Reader<'a>, dict: &Dict<'a>) -> Option<Stream<'a>> {
257    while r.forward_tag(b"stream").is_none() {
258        r.read_byte()?;
259    }
260
261    r.forward_tag(b"\n")
262        .or_else(|| r.forward_tag(b"\r\n"))
263        // Technically not allowed, but no reason to not try it.
264        .or_else(|| r.forward_tag(b"\r"))?;
265
266    let data_start = r.tail()?;
267    let start = r.offset();
268
269    loop {
270        if r.peek_byte()?.is_ascii_whitespace() || r.peek_tag(b"endstream").is_some() {
271            let length = r.offset() - start;
272            let data = data_start.get(..length)?;
273
274            r.skip_white_spaces();
275
276            // This was just a whitespace in the data stream but not actually marking the end
277            // of the stream, so continue searching.
278            if r.forward_tag(b"endstream").is_none() {
279                continue;
280            }
281
282            let stream = Stream {
283                data,
284                dict: dict.clone(),
285            };
286
287            // Try decoding the stream to see if it is valid.
288            if stream.decoded().is_ok() {
289                info!("managed to reconstruct the stream");
290
291                // Seems like we found the end!
292                return Some(stream);
293            }
294        } else {
295            r.read_byte()?;
296        }
297    }
298}
299
300impl<'a> TryFrom<Object<'a>> for Stream<'a> {
301    type Error = ();
302
303    fn try_from(value: Object<'a>) -> Result<Self, Self::Error> {
304        match value {
305            Object::Stream(s) => Ok(s),
306            _ => Err(()),
307        }
308    }
309}
310
311impl<'a> ObjectLike<'a> for Stream<'a> {}
312
313#[cfg(test)]
314mod tests {
315    use crate::object::Stream;
316    use crate::reader::{Reader, ReaderContext};
317
318    #[test]
319    fn stream() {
320        let data = b"<< /Length 10 >> stream\nabcdefghij\nendstream";
321        let mut r = Reader::new(data);
322        let stream = r
323            .read_with_context::<Stream>(&ReaderContext::dummy())
324            .unwrap();
325
326        assert_eq!(stream.data, b"abcdefghij");
327    }
328}