hayro_syntax/object/
stream.rs

1//! Streams.
2
3use crate::filter::Filter;
4use crate::object::Array;
5use crate::object::Dict;
6use crate::object::Name;
7use crate::object::dict::keys::{DECODE_PARMS, DP, F, FILTER, LENGTH};
8use crate::object::{Object, ObjectLike};
9use crate::reader::{Readable, Reader, ReaderContext, Skippable};
10use crate::util::OptionLog;
11use log::{info, warn};
12use std::fmt::{Debug, Formatter};
13
14/// A stream of arbitrary data.
15#[derive(Clone, PartialEq)]
16pub struct Stream<'a> {
17    dict: Dict<'a>,
18    data: &'a [u8],
19}
20
21impl<'a> Stream<'a> {
22    /// Return the raw (potentially filtered) data of the stream.
23    pub fn raw_data(&self) -> &'a [u8] {
24        self.data
25    }
26
27    /// Return the raw, underlying dictionary of the stream.
28    pub fn dict(&self) -> &Dict<'a> {
29        &self.dict
30    }
31
32    /// Return the decoded data of the stream.
33    ///
34    /// Note that the result of this method will not be cached, so calling it multiple
35    /// times is expensive.
36    pub fn decoded(&self) -> Result<Vec<u8>, DecodeFailure> {
37        self.decoded_image().map(|r| r.data)
38    }
39
40    /// Return the decoded data of the stream, and return image metadata
41    /// if available.
42    pub fn decoded_image(&self) -> Result<FilterResult, DecodeFailure> {
43        if let Some(filter) = self
44            .dict
45            .get::<Name>(F)
46            .or_else(|| self.dict.get::<Name>(FILTER))
47            .and_then(|n| Filter::from_name(n))
48        {
49            let params = self
50                .dict
51                .get::<Dict>(DP)
52                .or_else(|| self.dict.get::<Dict>(DECODE_PARMS));
53
54            filter.apply(self.data, params.clone().unwrap_or_default())
55        } else if let Some(filters) = self
56            .dict
57            .get::<Array>(F)
58            .or_else(|| self.dict.get::<Array>(FILTER))
59        {
60            let filters = filters
61                .iter::<Name>()
62                .map(|n| Filter::from_name(n))
63                .collect::<Option<Vec<_>>>()
64                .ok_or(DecodeFailure::Unknown)?;
65            let params: Vec<_> = self
66                .dict
67                .get::<Array>(DP)
68                .or_else(|| self.dict.get::<Array>(DECODE_PARMS))
69                .map(|a| a.iter::<Object>().collect())
70                .unwrap_or_default();
71
72            let mut current: Option<FilterResult> = None;
73
74            for (i, filter) in filters.iter().enumerate() {
75                let params = params.get(i).and_then(|p| p.clone().cast::<Dict>());
76
77                let new = filter.apply(
78                    current
79                        .as_ref()
80                        .map(|c| c.data.as_ref())
81                        .unwrap_or(self.data),
82                    params.clone().unwrap_or_default(),
83                )?;
84                current = Some(new);
85            }
86
87            Ok(current.unwrap_or(FilterResult {
88                data: self.data.to_vec(),
89                image_data: None,
90            }))
91        } else {
92            Ok(FilterResult {
93                data: self.data.to_vec(),
94                image_data: None,
95            })
96        }
97    }
98
99    pub(crate) fn from_raw(data: &'a [u8], dict: Dict<'a>) -> Self {
100        Self { dict, data }
101    }
102}
103
104impl Debug for Stream<'_> {
105    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
106        write!(f, "Stream (len: {:?})", self.data.len())
107    }
108}
109
110impl Skippable for Stream<'_> {
111    fn skip(_: &mut Reader<'_>, _: bool) -> Option<()> {
112        // A stream can never appear in a dict/array, so it should never be skipped.
113        warn!("attempted to skip a stream object");
114
115        None
116    }
117}
118
119impl<'a> Readable<'a> for Stream<'a> {
120    fn read(r: &mut Reader<'a>, ctx: ReaderContext<'a>) -> Option<Self> {
121        let dict = r.read_with_context::<Dict>(ctx)?;
122
123        if dict.contains_key(F) {
124            warn!("encountered stream referencing external file, which is unsupported");
125
126            return None;
127        }
128
129        let offset = r.offset();
130        parse_proper(r, &dict)
131            .or_else(|| {
132                warn!("failed to parse stream, trying to parse it manually");
133
134                r.jump(offset);
135                parse_fallback(r, &dict)
136            })
137            .error_none("was unable to manually parse the stream")
138    }
139}
140
141#[derive(Debug, Copy, Clone)]
142/// A failure that can occur during decoding a data stream.
143pub enum DecodeFailure {
144    /// An image stream failed to decode.
145    ImageDecode,
146    /// A data stream failed to decode.
147    StreamDecode,
148    /// A JPEG2000 image was encountered, while the `jpeg2000` feature was disabled.
149    JpxImage,
150    /// An unknown failure occurred.
151    Unknown,
152}
153
154/// An image color space.
155#[derive(Debug, Copy, Clone)]
156pub enum ImageColorSpace {
157    /// Grayscale color space.
158    Gray,
159    /// RGB color space.
160    Rgb,
161    /// CMYK color space.
162    Cmyk,
163}
164
165/// Additional data that is extracted from some image streams.
166pub struct ImageData {
167    /// An optional alpha channel of the image.
168    pub alpha: Option<Vec<u8>>,
169    /// The color space of the image.
170    pub color_space: ImageColorSpace,
171    /// The bits per component of the image.
172    pub bits_per_component: u8,
173}
174
175/// The result of applying a filter.
176pub struct FilterResult {
177    /// The decoded data.
178    pub data: Vec<u8>,
179    /// Additional data that is extracted from JPX image streams.
180    pub image_data: Option<ImageData>,
181}
182
183impl FilterResult {
184    pub(crate) fn from_data(data: Vec<u8>) -> Self {
185        Self {
186            data,
187            image_data: None,
188        }
189    }
190}
191
192fn parse_proper<'a>(r: &mut Reader<'a>, dict: &Dict<'a>) -> Option<Stream<'a>> {
193    let length = dict.get::<u32>(LENGTH)?;
194
195    r.skip_white_spaces_and_comments();
196    r.forward_tag(b"stream")?;
197    r.forward_tag(b"\n")
198        .or_else(|| r.forward_tag(b"\r\n"))
199        .or_else(|| r.forward_tag(b"\r"))?;
200    let data = r.read_bytes(length as usize)?;
201    r.skip_white_spaces();
202    r.forward_tag(b"endstream")?;
203
204    Some(Stream {
205        data,
206        dict: dict.clone(),
207    })
208}
209
210fn parse_fallback<'a>(r: &mut Reader<'a>, dict: &Dict<'a>) -> Option<Stream<'a>> {
211    while r.forward_tag(b"stream").is_none() {
212        r.read_byte()?;
213    }
214
215    r.forward_tag(b"\n").or_else(|| r.forward_tag(b"\r\n"))?;
216
217    let data_start = r.tail()?;
218    let start = r.offset();
219
220    loop {
221        if r.peek_byte()?.is_ascii_whitespace() || r.peek_tag(b"endstream").is_some() {
222            let length = r.offset() - start;
223            let data = data_start.get(..length)?;
224
225            r.skip_white_spaces();
226
227            // This was just a whitespace in the data stream but not actually marking the end
228            // of the stream, so continue searching.
229            if r.forward_tag(b"endstream").is_none() {
230                continue;
231            }
232
233            let stream = Stream {
234                data,
235                dict: dict.clone(),
236            };
237
238            // Try decoding the stream to see if it is valid.
239            if stream.decoded().is_ok() {
240                info!("managed to reconstruct the stream");
241
242                // Seems like we found the end!
243                return Some(stream);
244            }
245        } else {
246            r.read_byte()?;
247        }
248    }
249}
250
251impl<'a> TryFrom<Object<'a>> for Stream<'a> {
252    type Error = ();
253
254    fn try_from(value: Object<'a>) -> Result<Self, Self::Error> {
255        match value {
256            Object::Stream(s) => Ok(s),
257            _ => Err(()),
258        }
259    }
260}
261
262impl<'a> ObjectLike<'a> for Stream<'a> {}
263
264#[cfg(test)]
265mod tests {
266    use crate::object::Stream;
267    use crate::reader::{Reader, ReaderContext};
268
269    #[test]
270    fn stream() {
271        let data = b"<< /Length 10 >> stream\nabcdefghij\nendstream";
272        let mut r = Reader::new(data);
273        let stream = r
274            .read_with_context::<Stream>(ReaderContext::dummy())
275            .unwrap();
276
277        assert_eq!(stream.data, b"abcdefghij");
278    }
279}