hayro_syntax/object/
stream.rs

1//! Streams.
2
3use crate::filter::Filter;
4use crate::object::Dict;
5use crate::object::Name;
6use crate::object::dict::keys::{DECODE_PARMS, DP, F, FILTER, LENGTH};
7use crate::object::{Array, ObjectIdentifier};
8use crate::object::{Object, ObjectLike};
9use crate::reader::{Readable, Reader, ReaderContext, Skippable};
10use crate::util::OptionLog;
11use log::{info, warn};
12use std::fmt::{Debug, Formatter};
13
14/// A stream of arbitrary data.
15#[derive(Clone, PartialEq)]
16pub struct Stream<'a> {
17    dict: Dict<'a>,
18    data: &'a [u8],
19}
20
21/// Additional parameters for decoding images.
22#[derive(Clone, PartialEq, Default)]
23pub struct ImageDecodeParams {
24    /// Whether the color space of the image is an indexed color space.
25    pub is_indexed: bool,
26}
27
28impl<'a> Stream<'a> {
29    /// Return the raw (potentially filtered) data of the stream.
30    pub fn raw_data(&self) -> &'a [u8] {
31        self.data
32    }
33
34    /// Return the raw, underlying dictionary of the stream.
35    pub fn dict(&self) -> &Dict<'a> {
36        &self.dict
37    }
38
39    /// Return the object identifier of the stream.
40    pub fn obj_id(&self) -> ObjectIdentifier {
41        self.dict.obj_id().unwrap()
42    }
43
44    /// Return the decoded data of the stream.
45    ///
46    /// Note that the result of this method will not be cached, so calling it multiple
47    /// times is expensive.
48    pub fn decoded(&self) -> Result<Vec<u8>, DecodeFailure> {
49        self.decoded_image(&ImageDecodeParams::default())
50            .map(|r| r.data)
51    }
52
53    /// Return the decoded data of the stream, and return image metadata
54    /// if available.
55    pub fn decoded_image(
56        &self,
57        image_params: &ImageDecodeParams,
58    ) -> Result<FilterResult, DecodeFailure> {
59        if let Some(filter) = self
60            .dict
61            .get::<Name>(F)
62            .or_else(|| self.dict.get::<Name>(FILTER))
63            .and_then(|n| Filter::from_name(n))
64        {
65            let params = self
66                .dict
67                .get::<Dict>(DP)
68                .or_else(|| self.dict.get::<Dict>(DECODE_PARMS));
69
70            filter.apply(self.data, params.clone().unwrap_or_default(), image_params)
71        } else if let Some(filters) = self
72            .dict
73            .get::<Array>(F)
74            .or_else(|| self.dict.get::<Array>(FILTER))
75        {
76            let filters = filters
77                .iter::<Name>()
78                .map(|n| Filter::from_name(n))
79                .collect::<Option<Vec<_>>>()
80                .ok_or(DecodeFailure::Unknown)?;
81            let params: Vec<_> = self
82                .dict
83                .get::<Array>(DP)
84                .or_else(|| self.dict.get::<Array>(DECODE_PARMS))
85                .map(|a| a.iter::<Object>().collect())
86                .unwrap_or_default();
87
88            let mut current: Option<FilterResult> = None;
89
90            for (i, filter) in filters.iter().enumerate() {
91                let params = params.get(i).and_then(|p| p.clone().cast::<Dict>());
92
93                let new = filter.apply(
94                    current
95                        .as_ref()
96                        .map(|c| c.data.as_ref())
97                        .unwrap_or(self.data),
98                    params.clone().unwrap_or_default(),
99                    image_params,
100                )?;
101                current = Some(new);
102            }
103
104            Ok(current.unwrap_or(FilterResult {
105                data: self.data.to_vec(),
106                image_data: None,
107            }))
108        } else {
109            Ok(FilterResult {
110                data: self.data.to_vec(),
111                image_data: None,
112            })
113        }
114    }
115
116    pub(crate) fn from_raw(data: &'a [u8], dict: Dict<'a>) -> Self {
117        Self { dict, data }
118    }
119}
120
121impl Debug for Stream<'_> {
122    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
123        write!(f, "Stream (len: {:?})", self.data.len())
124    }
125}
126
127impl Skippable for Stream<'_> {
128    fn skip(_: &mut Reader<'_>, _: bool) -> Option<()> {
129        // A stream can never appear in a dict/array, so it should never be skipped.
130        warn!("attempted to skip a stream object");
131
132        None
133    }
134}
135
136impl<'a> Readable<'a> for Stream<'a> {
137    fn read(r: &mut Reader<'a>, ctx: ReaderContext<'a>) -> Option<Self> {
138        let dict = r.read_with_context::<Dict>(ctx)?;
139
140        if dict.contains_key(F) {
141            warn!("encountered stream referencing external file, which is unsupported");
142
143            return None;
144        }
145
146        let offset = r.offset();
147        parse_proper(r, &dict)
148            .or_else(|| {
149                warn!("failed to parse stream, trying to parse it manually");
150
151                r.jump(offset);
152                parse_fallback(r, &dict)
153            })
154            .error_none("was unable to manually parse the stream")
155    }
156}
157
158#[derive(Debug, Copy, Clone)]
159/// A failure that can occur during decoding a data stream.
160pub enum DecodeFailure {
161    /// An image stream failed to decode.
162    ImageDecode,
163    /// A data stream failed to decode.
164    StreamDecode,
165    /// A JPEG2000 image was encountered, while the `jpeg2000` feature was disabled.
166    JpxImage,
167    /// An unknown failure occurred.
168    Unknown,
169}
170
171/// An image color space.
172#[derive(Debug, Copy, Clone)]
173pub enum ImageColorSpace {
174    /// Grayscale color space.
175    Gray,
176    /// RGB color space.
177    Rgb,
178    /// CMYK color space.
179    Cmyk,
180}
181
182/// Additional data that is extracted from some image streams.
183pub struct ImageData {
184    /// An optional alpha channel of the image.
185    pub alpha: Option<Vec<u8>>,
186    /// The color space of the image.
187    pub color_space: ImageColorSpace,
188    /// The bits per component of the image.
189    pub bits_per_component: u8,
190}
191
192/// The result of applying a filter.
193pub struct FilterResult {
194    /// The decoded data.
195    pub data: Vec<u8>,
196    /// Additional data that is extracted from JPX image streams.
197    pub image_data: Option<ImageData>,
198}
199
200impl FilterResult {
201    pub(crate) fn from_data(data: Vec<u8>) -> Self {
202        Self {
203            data,
204            image_data: None,
205        }
206    }
207}
208
209fn parse_proper<'a>(r: &mut Reader<'a>, dict: &Dict<'a>) -> Option<Stream<'a>> {
210    let length = dict.get::<u32>(LENGTH)?;
211
212    r.skip_white_spaces_and_comments();
213    r.forward_tag(b"stream")?;
214    r.forward_tag(b"\n")
215        .or_else(|| r.forward_tag(b"\r\n"))
216        .or_else(|| r.forward_tag(b"\r"))?;
217    let data = r.read_bytes(length as usize)?;
218    r.skip_white_spaces();
219    r.forward_tag(b"endstream")?;
220
221    Some(Stream {
222        data,
223        dict: dict.clone(),
224    })
225}
226
227fn parse_fallback<'a>(r: &mut Reader<'a>, dict: &Dict<'a>) -> Option<Stream<'a>> {
228    while r.forward_tag(b"stream").is_none() {
229        r.read_byte()?;
230    }
231
232    r.forward_tag(b"\n").or_else(|| r.forward_tag(b"\r\n"))?;
233
234    let data_start = r.tail()?;
235    let start = r.offset();
236
237    loop {
238        if r.peek_byte()?.is_ascii_whitespace() || r.peek_tag(b"endstream").is_some() {
239            let length = r.offset() - start;
240            let data = data_start.get(..length)?;
241
242            r.skip_white_spaces();
243
244            // This was just a whitespace in the data stream but not actually marking the end
245            // of the stream, so continue searching.
246            if r.forward_tag(b"endstream").is_none() {
247                continue;
248            }
249
250            let stream = Stream {
251                data,
252                dict: dict.clone(),
253            };
254
255            // Try decoding the stream to see if it is valid.
256            if stream.decoded().is_ok() {
257                info!("managed to reconstruct the stream");
258
259                // Seems like we found the end!
260                return Some(stream);
261            }
262        } else {
263            r.read_byte()?;
264        }
265    }
266}
267
268impl<'a> TryFrom<Object<'a>> for Stream<'a> {
269    type Error = ();
270
271    fn try_from(value: Object<'a>) -> Result<Self, Self::Error> {
272        match value {
273            Object::Stream(s) => Ok(s),
274            _ => Err(()),
275        }
276    }
277}
278
279impl<'a> ObjectLike<'a> for Stream<'a> {}
280
281#[cfg(test)]
282mod tests {
283    use crate::object::Stream;
284    use crate::reader::{Reader, ReaderContext};
285
286    #[test]
287    fn stream() {
288        let data = b"<< /Length 10 >> stream\nabcdefghij\nendstream";
289        let mut r = Reader::new(data);
290        let stream = r
291            .read_with_context::<Stream>(ReaderContext::dummy())
292            .unwrap();
293
294        assert_eq!(stream.data, b"abcdefghij");
295    }
296}