hayro_syntax/object/
stream.rs1use crate::crypto::DecryptionTarget;
4use crate::filter::Filter;
5use crate::object;
6use crate::object::Dict;
7use crate::object::Name;
8use crate::object::dict::keys::{DECODE_PARMS, DP, F, FILTER, LENGTH, TYPE};
9use crate::object::{Array, ObjectIdentifier};
10use crate::object::{Object, ObjectLike};
11use crate::reader::{Readable, Reader, ReaderContext, Skippable};
12use crate::util::OptionLog;
13use log::{info, warn};
14use std::borrow::Cow;
15use std::fmt::{Debug, Formatter};
16
17#[derive(Clone, PartialEq)]
19pub struct Stream<'a> {
20 dict: Dict<'a>,
21 data: &'a [u8],
22}
23
24#[derive(Clone, PartialEq, Default)]
26pub struct ImageDecodeParams {
27 pub is_indexed: bool,
29 pub bpc: Option<u8>,
31}
32
33impl<'a> Stream<'a> {
34 pub fn raw_data(&self) -> Cow<'a, [u8]> {
38 let ctx = self.dict.ctx();
39
40 if ctx.xref.needs_decryption(ctx)
41 && self
42 .dict
43 .get::<object::String>(TYPE)
44 .map(|t| t.get().as_ref() != b"XRef")
45 .unwrap_or(true)
46 {
47 Cow::Owned(
48 ctx.xref
49 .decrypt(
50 self.dict.obj_id().unwrap(),
51 self.data,
52 DecryptionTarget::Stream,
53 )
54 .unwrap_or_default(),
56 )
57 } else {
58 Cow::Borrowed(self.data)
59 }
60 }
61
62 pub fn dict(&self) -> &Dict<'a> {
64 &self.dict
65 }
66
67 pub fn obj_id(&self) -> ObjectIdentifier {
69 self.dict.obj_id().unwrap()
70 }
71
72 pub fn decoded(&self) -> Result<Vec<u8>, DecodeFailure> {
77 self.decoded_image(&ImageDecodeParams::default())
78 .map(|r| r.data)
79 }
80
81 pub fn decoded_image(
84 &self,
85 image_params: &ImageDecodeParams,
86 ) -> Result<FilterResult, DecodeFailure> {
87 let data = self.raw_data();
88
89 if let Some(filter) = self
90 .dict
91 .get::<Name>(F)
92 .or_else(|| self.dict.get::<Name>(FILTER))
93 .and_then(|n| Filter::from_name(n))
94 {
95 let params = self
96 .dict
97 .get::<Dict>(DP)
98 .or_else(|| self.dict.get::<Dict>(DECODE_PARMS));
99
100 filter.apply(&data, params.clone().unwrap_or_default(), image_params)
101 } else if let Some(filters) = self
102 .dict
103 .get::<Array>(F)
104 .or_else(|| self.dict.get::<Array>(FILTER))
105 {
106 let filters = filters
107 .iter::<Name>()
108 .map(|n| Filter::from_name(n))
109 .collect::<Option<Vec<_>>>()
110 .ok_or(DecodeFailure::Unknown)?;
111 let params: Vec<_> = self
112 .dict
113 .get::<Array>(DP)
114 .or_else(|| self.dict.get::<Array>(DECODE_PARMS))
115 .map(|a| a.iter::<Object>().collect())
116 .unwrap_or_default();
117
118 let mut current: Option<FilterResult> = None;
119
120 for (i, filter) in filters.iter().enumerate() {
121 let params = params.get(i).and_then(|p| p.clone().cast::<Dict>());
122
123 let new = filter.apply(
124 current.as_ref().map(|c| c.data.as_ref()).unwrap_or(&data),
125 params.clone().unwrap_or_default(),
126 image_params,
127 )?;
128 current = Some(new);
129 }
130
131 Ok(current.unwrap_or(FilterResult {
132 data: data.to_vec(),
133 image_data: None,
134 }))
135 } else {
136 Ok(FilterResult {
137 data: data.to_vec(),
138 image_data: None,
139 })
140 }
141 }
142
143 pub(crate) fn from_raw(data: &'a [u8], dict: Dict<'a>) -> Self {
144 Self { dict, data }
145 }
146}
147
148impl Debug for Stream<'_> {
149 fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
150 write!(f, "Stream (len: {:?})", self.data.len())
151 }
152}
153
154impl Skippable for Stream<'_> {
155 fn skip(_: &mut Reader<'_>, _: bool) -> Option<()> {
156 warn!("attempted to skip a stream object");
158
159 None
160 }
161}
162
163impl<'a> Readable<'a> for Stream<'a> {
164 fn read(r: &mut Reader<'a>, ctx: &ReaderContext<'a>) -> Option<Self> {
165 let dict = r.read_with_context::<Dict>(ctx)?;
166
167 if dict.contains_key(F) {
168 warn!("encountered stream referencing external file, which is unsupported");
169
170 return None;
171 }
172
173 let offset = r.offset();
174 parse_proper(r, &dict)
175 .or_else(|| {
176 warn!("failed to parse stream, trying to parse it manually");
177
178 r.jump(offset);
179 parse_fallback(r, &dict)
180 })
181 .error_none("was unable to manually parse the stream")
182 }
183}
184
185#[derive(Debug, Copy, Clone)]
186pub enum DecodeFailure {
188 ImageDecode,
190 StreamDecode,
192 JpxImage,
194 Decryption,
196 Unknown,
198}
199
200#[derive(Debug, Copy, Clone)]
202pub enum ImageColorSpace {
203 Gray,
205 Rgb,
207 Cmyk,
209}
210
211pub struct ImageData {
213 pub alpha: Option<Vec<u8>>,
215 pub color_space: ImageColorSpace,
217 pub bits_per_component: u8,
219}
220
221pub struct FilterResult {
223 pub data: Vec<u8>,
225 pub image_data: Option<ImageData>,
227}
228
229impl FilterResult {
230 pub(crate) fn from_data(data: Vec<u8>) -> Self {
231 Self {
232 data,
233 image_data: None,
234 }
235 }
236}
237
238fn parse_proper<'a>(r: &mut Reader<'a>, dict: &Dict<'a>) -> Option<Stream<'a>> {
239 let length = dict.get::<u32>(LENGTH)?;
240
241 r.skip_white_spaces_and_comments();
242 r.forward_tag(b"stream")?;
243 r.forward_tag(b"\n")
244 .or_else(|| r.forward_tag(b"\r\n"))
245 .or_else(|| r.forward_tag(b"\r"))?;
246 let data = r.read_bytes(length as usize)?;
247 r.skip_white_spaces();
248 r.forward_tag(b"endstream")?;
249
250 Some(Stream {
251 data,
252 dict: dict.clone(),
253 })
254}
255
256fn parse_fallback<'a>(r: &mut Reader<'a>, dict: &Dict<'a>) -> Option<Stream<'a>> {
257 while r.forward_tag(b"stream").is_none() {
258 r.read_byte()?;
259 }
260
261 r.forward_tag(b"\n")
262 .or_else(|| r.forward_tag(b"\r\n"))
263 .or_else(|| r.forward_tag(b"\r"))?;
265
266 let data_start = r.tail()?;
267 let start = r.offset();
268
269 loop {
270 if r.peek_byte()?.is_ascii_whitespace() || r.peek_tag(b"endstream").is_some() {
271 let length = r.offset() - start;
272 let data = data_start.get(..length)?;
273
274 r.skip_white_spaces();
275
276 if r.forward_tag(b"endstream").is_none() {
279 continue;
280 }
281
282 let stream = Stream {
283 data,
284 dict: dict.clone(),
285 };
286
287 if stream.decoded().is_ok() {
289 info!("managed to reconstruct the stream");
290
291 return Some(stream);
293 }
294 } else {
295 r.read_byte()?;
296 }
297 }
298}
299
300impl<'a> TryFrom<Object<'a>> for Stream<'a> {
301 type Error = ();
302
303 fn try_from(value: Object<'a>) -> Result<Self, Self::Error> {
304 match value {
305 Object::Stream(s) => Ok(s),
306 _ => Err(()),
307 }
308 }
309}
310
311impl<'a> ObjectLike<'a> for Stream<'a> {}
312
313#[cfg(test)]
314mod tests {
315 use crate::object::Stream;
316 use crate::reader::{Reader, ReaderContext};
317
318 #[test]
319 fn stream() {
320 let data = b"<< /Length 10 >> stream\nabcdefghij\nendstream";
321 let mut r = Reader::new(data);
322 let stream = r
323 .read_with_context::<Stream>(&ReaderContext::dummy())
324 .unwrap();
325
326 assert_eq!(stream.data, b"abcdefghij");
327 }
328}