hayro_syntax/object/
stream.rs1use crate::crypto::DecryptionTarget;
4use crate::filter::Filter;
5use crate::object;
6use crate::object::Dict;
7use crate::object::Name;
8use crate::object::dict::keys::{DECODE_PARMS, DP, F, FILTER, LENGTH, TYPE};
9use crate::object::{Array, ObjectIdentifier};
10use crate::object::{Object, ObjectLike};
11use crate::reader::Reader;
12use crate::reader::{Readable, ReaderContext, ReaderExt, Skippable};
13use crate::util::OptionLog;
14use log::warn;
15use smallvec::SmallVec;
16use std::borrow::Cow;
17use std::fmt::{Debug, Formatter};
18
19#[derive(Clone)]
21pub struct Stream<'a> {
22 dict: Dict<'a>,
23 filters: SmallVec<[Filter; 2]>,
24 filter_params: SmallVec<[Dict<'a>; 2]>,
25 data: &'a [u8],
26}
27
28impl PartialEq for Stream<'_> {
29 fn eq(&self, other: &Self) -> bool {
30 self.dict == other.dict && self.data == other.data
31 }
32}
33
34#[derive(Clone, PartialEq, Default)]
36pub struct ImageDecodeParams {
37 pub is_indexed: bool,
39 pub bpc: Option<u8>,
41 pub num_components: Option<u8>,
43 pub target_dimension: Option<(u32, u32)>,
47 pub width: u32,
49 pub height: u32,
51}
52
53impl<'a> Stream<'a> {
54 pub(crate) fn new(data: &'a [u8], dict: Dict<'a>) -> Self {
55 let mut collected_filters = SmallVec::new();
56 let mut collected_params = SmallVec::new();
57
58 if let Some(filter) = dict
59 .get::<Name<'_>>(F)
60 .or_else(|| dict.get::<Name<'_>>(FILTER))
61 .and_then(|n| Filter::from_name(n))
62 {
63 let params = dict
64 .get::<Dict<'_>>(DP)
65 .or_else(|| dict.get::<Dict<'_>>(DECODE_PARMS))
66 .unwrap_or_default();
67
68 collected_filters.push(filter);
69 collected_params.push(params);
70 } else if let Some(filters) = dict
71 .get::<Array<'_>>(F)
72 .or_else(|| dict.get::<Array<'_>>(FILTER))
73 {
74 let filters = filters.iter::<Name<'_>>().map(|n| Filter::from_name(n));
75 let mut params = dict
76 .get::<Array<'_>>(DP)
77 .or_else(|| dict.get::<Array<'_>>(DECODE_PARMS))
78 .map(|a| a.iter::<Object<'_>>());
79
80 for filter in filters {
81 let params = params
82 .as_mut()
83 .and_then(|p| p.next())
84 .and_then(|p| p.into_dict())
85 .unwrap_or_default();
86
87 if let Some(filter) = filter {
88 collected_filters.push(filter);
89 collected_params.push(params);
90 }
91 }
92 }
93
94 Self {
95 dict,
96 filters: collected_filters,
97 filter_params: collected_params,
98 data,
99 }
100 }
101
102 pub fn raw_data(&self) -> Cow<'a, [u8]> {
106 let ctx = self.dict.ctx();
107
108 if ctx.xref.needs_decryption(ctx)
109 && self
110 .dict
111 .get::<object::String<'_>>(TYPE)
112 .map(|t| t.get().as_ref() != b"XRef")
113 .unwrap_or(true)
114 {
115 Cow::Owned(
116 ctx.xref
117 .decrypt(
118 self.dict.obj_id().unwrap(),
119 self.data,
120 DecryptionTarget::Stream,
121 )
122 .unwrap_or_default(),
124 )
125 } else {
126 Cow::Borrowed(self.data)
127 }
128 }
129
130 pub fn dict(&self) -> &Dict<'a> {
132 &self.dict
133 }
134
135 pub fn obj_id(&self) -> ObjectIdentifier {
137 self.dict.obj_id().unwrap()
138 }
139
140 pub fn filters(&self) -> &[Filter] {
142 &self.filters
143 }
144
145 pub fn decoded(&self) -> Result<Vec<u8>, DecodeFailure> {
150 self.decoded_image(&ImageDecodeParams::default())
151 .map(|r| r.data)
152 }
153
154 pub fn decoded_image(
157 &self,
158 image_params: &ImageDecodeParams,
159 ) -> Result<FilterResult, DecodeFailure> {
160 let data = self.raw_data();
161
162 let mut current: Option<FilterResult> = None;
163
164 for (filter, params) in self.filters.iter().zip(self.filter_params.iter()) {
165 let new = filter.apply(
166 current.as_ref().map(|c| c.data.as_ref()).unwrap_or(&data),
167 params.clone(),
168 image_params,
169 )?;
170 current = Some(new);
171 }
172
173 Ok(current.unwrap_or(FilterResult {
174 data: data.to_vec(),
175 image_data: None,
176 }))
177 }
178}
179
180impl Debug for Stream<'_> {
181 fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
182 write!(f, "Stream (len: {:?})", self.data.len())
183 }
184}
185
186impl Skippable for Stream<'_> {
187 fn skip(_: &mut Reader<'_>, _: bool) -> Option<()> {
188 warn!("attempted to skip a stream object");
190
191 None
192 }
193}
194
195impl<'a> Readable<'a> for Stream<'a> {
196 fn read(r: &mut Reader<'a>, ctx: &ReaderContext<'a>) -> Option<Self> {
197 let dict = r.read_with_context::<Dict<'_>>(ctx)?;
198
199 if dict.contains_key(F) {
200 warn!("encountered stream referencing external file, which is unsupported");
201
202 return None;
203 }
204
205 let offset = r.offset();
206 parse_proper(r, &dict)
207 .or_else(|| {
208 warn!("failed to parse stream, trying to parse it manually");
209
210 r.jump(offset);
211 parse_fallback(r, &dict)
212 })
213 .error_none("was unable to manually parse the stream")
214 }
215}
216
217#[derive(Debug, Copy, Clone)]
218pub enum DecodeFailure {
220 ImageDecode,
222 StreamDecode,
224 Decryption,
226 Unknown,
228}
229
230#[derive(Debug, Copy, Clone)]
232pub enum ImageColorSpace {
233 Gray,
235 Rgb,
237 Cmyk,
239 Unknown(u8),
241}
242
243pub struct ImageData {
245 pub alpha: Option<Vec<u8>>,
247 pub color_space: Option<ImageColorSpace>,
249 pub bits_per_component: u8,
251 pub width: u32,
253 pub height: u32,
255}
256
257pub struct FilterResult {
259 pub data: Vec<u8>,
261 pub image_data: Option<ImageData>,
263}
264
265impl FilterResult {
266 pub(crate) fn from_data(data: Vec<u8>) -> Self {
267 Self {
268 data,
269 image_data: None,
270 }
271 }
272}
273
274fn parse_proper<'a>(r: &mut Reader<'a>, dict: &Dict<'a>) -> Option<Stream<'a>> {
275 let length = dict.get::<u32>(LENGTH)?;
276
277 r.skip_white_spaces_and_comments();
278 r.forward_tag(b"stream")?;
279 r.forward_tag(b"\n")
280 .or_else(|| r.forward_tag(b"\r\n"))
281 .or_else(|| r.forward_tag(b"\r"))?;
282 let data = r.read_bytes(length as usize)?;
283 r.skip_white_spaces();
284 r.forward_tag(b"endstream")?;
285
286 Some(Stream::new(data, dict.clone()))
287}
288
289fn parse_fallback<'a>(r: &mut Reader<'a>, dict: &Dict<'a>) -> Option<Stream<'a>> {
290 while r.forward_tag(b"stream").is_none() {
291 r.read_byte()?;
292 }
293
294 r.forward_tag(b"\n")
295 .or_else(|| r.forward_tag(b"\r\n"))
296 .or_else(|| r.forward_tag(b"\r"))?;
298
299 let data_start = r.tail()?;
300 let start = r.offset();
301
302 loop {
303 if r.peek_byte()?.is_ascii_whitespace() || r.peek_tag(b"endstream").is_some() {
304 let length = r.offset() - start;
305 let data = data_start.get(..length)?;
306
307 r.skip_white_spaces();
308
309 if r.forward_tag(b"endstream").is_none() {
312 continue;
313 }
314
315 let stream = Stream::new(data, dict.clone());
316
317 return Some(stream);
319 } else {
320 r.read_byte()?;
321 }
322 }
323}
324
325impl<'a> TryFrom<Object<'a>> for Stream<'a> {
326 type Error = ();
327
328 fn try_from(value: Object<'a>) -> Result<Self, Self::Error> {
329 match value {
330 Object::Stream(s) => Ok(s),
331 _ => Err(()),
332 }
333 }
334}
335
336impl<'a> ObjectLike<'a> for Stream<'a> {}
337
338#[cfg(test)]
339mod tests {
340 use crate::object::Stream;
341 use crate::reader::Reader;
342 use crate::reader::{ReaderContext, ReaderExt};
343
344 #[test]
345 fn stream() {
346 let data = b"<< /Length 10 >> stream\nabcdefghij\nendstream";
347 let mut r = Reader::new(data);
348 let stream = r
349 .read_with_context::<Stream<'_>>(&ReaderContext::dummy())
350 .unwrap();
351
352 assert_eq!(stream.data, b"abcdefghij");
353 }
354}