hayro_syntax/object/
stream.rs

1//! Stream objects.
2
3use crate::filter::{Filter, FilterResult};
4use crate::object::array::Array;
5use crate::object::dict::Dict;
6use crate::object::dict::keys::{DECODE_PARMS, DP, F, FILTER, LENGTH};
7use crate::object::name::Name;
8use crate::object::{Object, ObjectLike};
9use crate::reader::{Readable, Reader, Skippable};
10use crate::util::OptionLog;
11use crate::xref::XRef;
12use log::warn;
13use std::fmt::{Debug, Formatter};
14
15/// A stream of arbitrary data.
16#[derive(Clone, PartialEq)]
17pub struct Stream<'a> {
18    dict: Dict<'a>,
19    data: &'a [u8],
20}
21
22impl<'a> Stream<'a> {
23    /// Return the raw (potentially with some applied filters) data of the stream.
24    pub fn raw_data(&self) -> &'a [u8] {
25        self.data
26    }
27
28    /// Return the raw (potentially with some applied filters) data of the stream.
29    pub fn dict(&self) -> &Dict<'a> {
30        &self.dict
31    }
32
33    /// Return the decoded data of the stream.
34    ///
35    /// Note that the result of this method will not be cached, so calling it multiple
36    /// times is expensive.
37    pub fn decoded(&self) -> Option<Vec<u8>> {
38        self.decoded_image().map(|r| r.data)
39    }
40
41    /// Return the decoded data of the stream, and return image metadata in case
42    /// the data stream is a JPX stream.
43    pub fn decoded_image(&self) -> Option<FilterResult> {
44        if let Some(filter) = self
45            .dict
46            .get::<Name>(F)
47            .or_else(|| self.dict.get::<Name>(FILTER))
48            .and_then(|n| Filter::from_name(&n))
49        {
50            let params = self
51                .dict
52                .get::<Dict>(DP)
53                .or_else(|| self.dict.get::<Dict>(DECODE_PARMS));
54
55            filter.apply(self.data, params.clone().unwrap_or_default())
56        } else if let Some(filters) = self
57            .dict
58            .get::<Array>(F)
59            .or_else(|| self.dict.get::<Array>(FILTER))
60        {
61            let filters = filters
62                .iter::<Name>()
63                .map(|n| Filter::from_name(&n))
64                .collect::<Option<Vec<_>>>()?;
65            let params = self
66                .dict
67                .get::<Array>(DP)
68                .or_else(|| self.dict.get::<Array>(DECODE_PARMS))
69                .map(|a| a.iter::<Object>().collect())
70                .unwrap_or(vec![]);
71
72            let mut current: Option<FilterResult> = None;
73
74            for i in 0..filters.len() {
75                let params = params.get(i).and_then(|p| p.clone().cast::<Dict>());
76
77                let new = filters[i].apply(
78                    current
79                        .as_ref()
80                        .map(|c| c.data.as_ref())
81                        .unwrap_or(self.data),
82                    params.clone().unwrap_or_default(),
83                )?;
84                current = Some(new);
85            }
86
87            Some(current.unwrap_or(FilterResult {
88                data: self.data.to_vec(),
89                color_space: None,
90                bits_per_component: None,
91            }))
92        } else {
93            Some(FilterResult {
94                data: self.data.to_vec(),
95                color_space: None,
96                bits_per_component: None,
97            })
98        }
99    }
100
101    pub(crate) fn from_raw(data: &'a [u8], dict: Dict<'a>) -> Self {
102        Self { dict, data }
103    }
104}
105
106impl Debug for Stream<'_> {
107    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
108        write!(f, "Stream (len: {:?})", self.data.len())
109    }
110}
111
112impl Skippable for Stream<'_> {
113    fn skip<const PLAIN: bool>(_: &mut Reader<'_>) -> Option<()> {
114        // A stream can never appear in a dict/array, so it should never be skipped.
115        warn!("attempted to skip a stream object");
116
117        None
118    }
119}
120
121impl<'a> Readable<'a> for Stream<'a> {
122    fn read<const PLAIN: bool>(r: &mut Reader<'a>, xref: &'a XRef) -> Option<Self> {
123        let dict = r.read_with_xref::<Dict>(xref)?;
124
125        if dict.contains_key(F) {
126            warn!("encountered stream referencing external file, which is unsupported");
127
128            return None;
129        }
130
131        let offset = r.offset();
132        parse_proper(r, &dict)
133            .or_else(|| {
134                warn!("failed to parse stream, trying to parse it manually");
135
136                r.jump(offset);
137                parse_fallback(r, &dict)
138            })
139            .error_none("was unable to manually parse the stream")
140    }
141}
142
143fn parse_proper<'a>(r: &mut Reader<'a>, dict: &Dict<'a>) -> Option<Stream<'a>> {
144    let length = dict.get::<u32>(LENGTH)?;
145
146    r.skip_white_spaces_and_comments();
147    r.forward_tag(b"stream")?;
148    r.forward_tag(b"\n").or_else(|| r.forward_tag(b"\r\n"))?;
149    let data = r.read_bytes(length as usize)?;
150    r.skip_white_spaces();
151    r.forward_tag(b"endstream")?;
152
153    Some(Stream {
154        data,
155        dict: dict.clone(),
156    })
157}
158
159fn parse_fallback<'a>(r: &mut Reader<'a>, dict: &Dict<'a>) -> Option<Stream<'a>> {
160    while r.forward_tag(b"stream").is_none() {
161        r.read_byte()?;
162    }
163
164    r.forward_tag(b"\n").or_else(|| r.forward_tag(b"\r\n"))?;
165
166    let data_start = r.tail()?;
167    let start = r.offset();
168
169    loop {
170        if r.peek_byte()?.is_ascii_whitespace() {
171            let length = r.offset() - start;
172            let data = data_start.get(..length)?;
173
174            r.skip_white_spaces();
175
176            // This was just a whitespace in the data stream but not actually marking the end
177            // of the stream, so continue searching.
178            if r.forward_tag(b"endstream").is_none() {
179                continue;
180            }
181
182            let stream = Stream {
183                data,
184                dict: dict.clone(),
185            };
186
187            // Try decoding the stream to see if it is valid.
188            if stream.decoded().is_some() {
189                warn!("managed to reconstruct the stream");
190
191                // Seems like we found the end!
192                return Some(stream);
193            }
194        } else {
195            r.read_byte()?;
196        }
197    }
198}
199
200impl<'a> TryFrom<Object<'a>> for Stream<'a> {
201    type Error = ();
202
203    fn try_from(value: Object<'a>) -> Result<Self, Self::Error> {
204        match value {
205            Object::Stream(s) => Ok(s),
206            _ => Err(()),
207        }
208    }
209}
210
211impl<'a> ObjectLike<'a> for Stream<'a> {}
212
213#[cfg(test)]
214mod tests {
215    use crate::object::stream::Stream;
216    use crate::reader::Reader;
217    use crate::xref::XRef;
218
219    #[test]
220    fn stream() {
221        let data = b"<< /Length 10 >> stream\nabcdefghij\nendstream";
222        let mut r = Reader::new(data);
223        let stream = r.read_with_xref::<Stream>(&XRef::dummy()).unwrap();
224
225        assert_eq!(stream.data, b"abcdefghij");
226    }
227}