pdf_syntax/object/
stream.rs1use crate::crypto::DecryptionTarget;
4use crate::filter::Filter;
5use crate::object;
6use crate::object::Dict;
7use crate::object::Name;
8use crate::object::dict::keys::{DECODE_PARMS, DP, F, FILTER, LENGTH, TYPE};
9use crate::object::{Array, ObjectIdentifier};
10use crate::object::{Object, ObjectLike};
11use crate::reader::Reader;
12use crate::reader::{Readable, ReaderContext, ReaderExt, Skippable};
13use crate::sync::Arc;
14use crate::util::OptionLog;
15use alloc::borrow::Cow;
16use alloc::vec::Vec;
17use core::fmt::{Debug, Formatter};
18use log::warn;
19use smallvec::SmallVec;
20
21#[derive(Clone)]
22struct StreamInner<'a> {
23 dict: Dict<'a>,
24 filters: SmallVec<[Filter; 2]>,
25 filter_params: SmallVec<[Dict<'a>; 2]>,
26 data: &'a [u8],
27}
28
29#[derive(Clone)]
31pub struct Stream<'a>(Arc<StreamInner<'a>>);
32
33impl PartialEq for Stream<'_> {
34 fn eq(&self, other: &Self) -> bool {
35 self.0.dict == other.0.dict && self.0.data == other.0.data
36 }
37}
38
39#[derive(Clone, PartialEq, Default)]
41pub struct ImageDecodeParams {
42 pub is_indexed: bool,
44 pub bpc: Option<u8>,
46 pub num_components: Option<u8>,
48 pub target_dimension: Option<(u32, u32)>,
52 pub width: u32,
54 pub height: u32,
56}
57
58impl<'a> Stream<'a> {
59 pub(crate) fn new(data: &'a [u8], dict: Dict<'a>) -> Self {
60 let mut collected_filters = SmallVec::new();
61 let mut collected_params = SmallVec::new();
62
63 if let Some(filter) = dict
64 .get::<Name>(F)
65 .or_else(|| dict.get::<Name>(FILTER))
66 .and_then(Filter::from_name)
67 {
68 let params = dict
69 .get::<Dict<'_>>(DP)
70 .or_else(|| dict.get::<Dict<'_>>(DECODE_PARMS))
71 .unwrap_or_default();
72
73 collected_filters.push(filter);
74 collected_params.push(params);
75 } else if let Some(filters) = dict
76 .get::<Array<'_>>(F)
77 .or_else(|| dict.get::<Array<'_>>(FILTER))
78 {
79 let filters = filters.iter::<Name>().map(Filter::from_name);
80 let mut params = dict
81 .get::<Array<'_>>(DP)
82 .or_else(|| dict.get::<Array<'_>>(DECODE_PARMS))
83 .map(|a| a.iter::<Object<'_>>());
84
85 for filter in filters {
86 let params = params
87 .as_mut()
88 .and_then(|p| p.next())
89 .and_then(|p| p.into_dict())
90 .unwrap_or_default();
91
92 if let Some(filter) = filter {
93 collected_filters.push(filter);
94 collected_params.push(params);
95 }
96 }
97 }
98
99 Self(Arc::new(StreamInner {
100 dict,
101 filters: collected_filters,
102 filter_params: collected_params,
103 data,
104 }))
105 }
106
107 pub fn raw_data(&self) -> Cow<'a, [u8]> {
111 let ctx = self.0.dict.ctx();
112
113 if ctx.xref().needs_decryption(ctx)
114 && self
115 .0
116 .dict
117 .get::<object::String>(TYPE)
118 .map(|t| t.as_ref() != b"XRef")
119 .unwrap_or(true)
120 {
121 if let Some(obj_id) = self.0.dict.obj_id() {
124 Cow::Owned(
125 ctx.xref()
126 .decrypt(obj_id, self.0.data, DecryptionTarget::Stream)
127 .unwrap_or_default(),
128 )
129 } else {
130 Cow::Borrowed(self.0.data)
131 }
132 } else {
133 Cow::Borrowed(self.0.data)
134 }
135 }
136
137 pub fn dict(&self) -> &Dict<'a> {
139 &self.0.dict
140 }
141
142 pub fn obj_id(&self) -> Option<ObjectIdentifier> {
146 self.0.dict.obj_id()
147 }
148
149 pub fn filters(&self) -> &[Filter] {
151 &self.0.filters
152 }
153
154 pub fn decoded(&self) -> Result<Vec<u8>, DecodeFailure> {
159 self.decoded_image(&ImageDecodeParams::default())
160 .map(|r| r.data)
161 }
162
163 pub fn decoded_image(
166 &self,
167 image_params: &ImageDecodeParams,
168 ) -> Result<FilterResult, DecodeFailure> {
169 let data = self.raw_data();
170
171 let mut current: Option<FilterResult> = None;
172
173 for (filter, params) in self.0.filters.iter().zip(self.0.filter_params.iter()) {
174 let new = filter.apply(
175 current.as_ref().map(|c| c.data.as_ref()).unwrap_or(&data),
176 params.clone(),
177 image_params,
178 )?;
179 current = Some(new);
180 }
181
182 Ok(current.unwrap_or(FilterResult {
183 data: data.to_vec(),
184 image_data: None,
185 }))
186 }
187}
188
189impl Debug for Stream<'_> {
190 fn fmt(&self, f: &mut Formatter<'_>) -> core::fmt::Result {
191 write!(f, "Stream (len: {:?})", self.0.data.len())
192 }
193}
194
195impl Skippable for Stream<'_> {
196 fn skip(_: &mut Reader<'_>, _: bool) -> Option<()> {
197 warn!("attempted to skip a stream object");
199
200 None
201 }
202}
203
204impl<'a> Readable<'a> for Stream<'a> {
205 fn read(r: &mut Reader<'a>, ctx: &ReaderContext<'a>) -> Option<Self> {
206 let dict = r.read_with_context::<Dict<'_>>(ctx)?;
207
208 if dict.contains_key(F) {
209 warn!("encountered stream referencing external file, which is unsupported");
210
211 return None;
212 }
213
214 let offset = r.offset();
215 parse_proper(r, &dict)
216 .or_else(|| {
217 warn!("failed to parse stream, trying to parse it manually");
218
219 r.jump(offset);
220 parse_fallback(r, &dict)
221 })
222 .error_none("was unable to manually parse the stream")
223 }
224}
225
226#[derive(Debug, Copy, Clone)]
227pub enum DecodeFailure {
229 ImageDecode,
231 StreamDecode,
233 Decryption,
235 Unknown,
237}
238
239#[derive(Debug, Copy, Clone)]
241pub enum ImageColorSpace {
242 Gray,
244 Rgb,
246 RgbFromYCbCr,
256 Cmyk,
258 Unknown(u8),
260}
261
262pub struct ImageData {
264 pub alpha: Option<Vec<u8>>,
266 pub color_space: Option<ImageColorSpace>,
268 pub bits_per_component: u8,
270 pub width: u32,
272 pub height: u32,
274}
275
276pub struct FilterResult {
278 pub data: Vec<u8>,
280 pub image_data: Option<ImageData>,
282}
283
284impl FilterResult {
285 pub(crate) fn from_data(data: Vec<u8>) -> Self {
286 Self {
287 data,
288 image_data: None,
289 }
290 }
291}
292
293fn parse_proper<'a>(r: &mut Reader<'a>, dict: &Dict<'a>) -> Option<Stream<'a>> {
294 let length = dict.get::<u32>(LENGTH)?;
295
296 r.skip_white_spaces_and_comments();
297 r.forward_tag(b"stream")?;
298 while r.peek_byte().is_some_and(|b| b == b' ' || b == b'\t') {
302 r.forward();
303 }
304 r.forward_tag(b"\n")
305 .or_else(|| r.forward_tag(b"\r\n"))
306 .or_else(|| r.forward_tag(b"\r"))?;
307 let data = r.read_bytes(length as usize)?;
308 r.skip_white_spaces();
309 r.forward_tag(b"endstream")?;
310
311 Some(Stream::new(data, dict.clone()))
312}
313
314fn parse_fallback<'a>(r: &mut Reader<'a>, dict: &Dict<'a>) -> Option<Stream<'a>> {
315 while r.forward_tag(b"stream").is_none() {
316 r.read_byte()?;
317 }
318
319 while r.peek_byte().is_some_and(|b| b == b' ' || b == b'\t') {
322 r.forward();
323 }
324 r.forward_tag(b"\n")
325 .or_else(|| r.forward_tag(b"\r\n"))
326 .or_else(|| r.forward_tag(b"\r"))?;
328
329 let data_start = r.tail()?;
330 let start = r.offset();
331
332 loop {
333 if r.peek_byte()?.is_ascii_whitespace() || r.peek_tag(b"endstream").is_some() {
334 let length = r.offset() - start;
335 let data = data_start.get(..length)?;
336
337 r.skip_white_spaces();
338
339 if r.forward_tag(b"endstream").is_none() {
342 continue;
343 }
344
345 let stream = Stream::new(data, dict.clone());
346
347 return Some(stream);
349 } else {
350 r.read_byte()?;
351 }
352 }
353}
354
355impl<'a> TryFrom<Object<'a>> for Stream<'a> {
356 type Error = ();
357
358 fn try_from(value: Object<'a>) -> Result<Self, Self::Error> {
359 match value {
360 Object::Stream(s) => Ok(s),
361 _ => Err(()),
362 }
363 }
364}
365
366impl<'a> ObjectLike<'a> for Stream<'a> {}
367
368#[cfg(test)]
369mod tests {
370 use crate::object::Stream;
371 use crate::reader::Reader;
372 use crate::reader::{ReaderContext, ReaderExt};
373
374 #[test]
375 fn stream() {
376 let data = b"<< /Length 10 >> stream\nabcdefghij\nendstream";
377 let mut r = Reader::new(data);
378 let stream = r
379 .read_with_context::<Stream<'_>>(&ReaderContext::dummy())
380 .unwrap();
381
382 assert_eq!(stream.0.data, b"abcdefghij");
383 }
384}