pdf/object/
stream.rs

1use datasize::DataSize;
2
3use crate as pdf;
4use crate::object::*;
5use crate::primitive::*;
6use crate::error::*;
7use crate::parser::Lexer;
8use crate::enc::{StreamFilter, decode};
9
10use std::ops::{Deref, Range};
11use std::fmt;
12
13#[derive(Clone)]
14pub (crate) enum StreamData {
15    Generated(Arc<[u8]>),
16    Original(Range<usize>, PlainRef),
17}
18datasize::non_dynamic_const_heap_size!(StreamData, std::mem::size_of::<StreamData>());
19
20/// Simple Stream object with only some additional entries from the stream dict (I).
21#[derive(Clone, DataSize)]
22pub struct Stream<I> {
23    pub info: StreamInfo<I>,
24    pub (crate) inner_data: StreamData,
25}
26impl<I: Object> Stream<I> {
27    pub fn from_stream(s: PdfStream, resolve: &impl Resolve) -> Result<Self> {
28        let PdfStream {info, inner} = s;
29        let info = StreamInfo::<I>::from_primitive(Primitive::Dictionary (info), resolve)?;
30        let inner_data = match inner {
31            StreamInner::InFile { id, file_range } => StreamData::Original(file_range, id),
32            StreamInner::Pending { data } => StreamData::Generated(data)
33        };
34        Ok(Stream { info, inner_data })
35    }
36
37    /// the data is not compressed. the specified filters are to be applied when compressing the data
38    pub fn new_with_filters(i: I, data: impl Into<Arc<[u8]>>, filters: Vec<StreamFilter>) -> Stream<I> {
39        Stream {
40            info: StreamInfo {
41                filters,
42                file: None,
43                file_filters: Vec::new(),
44                info: i
45            },
46            inner_data: StreamData::Generated(data.into()),
47        }
48    }
49    pub fn new(i: I, data: impl Into<Arc<[u8]>>) -> Stream<I> {
50        Stream {
51            info: StreamInfo {
52                filters: Vec::new(),
53                file: None,
54                file_filters: Vec::new(),
55                info: i
56            },
57            inner_data: StreamData::Generated(data.into()),
58        }
59    }
60    /// the data is already compressed with the specified filters
61    pub fn from_compressed(i: I, data: impl Into<Arc<[u8]>>, filters: Vec<StreamFilter>) -> Stream<I> {
62        Stream {
63            info: StreamInfo {
64                filters: filters.clone(),
65                file: None,
66                file_filters: Vec::new(),
67                info: i
68            },
69            inner_data: StreamData::Generated(data.into()),
70        }
71    }
72
73    pub fn data(&self, resolve: &impl Resolve) -> Result<Arc<[u8]>> {
74        match self.inner_data {
75            StreamData::Generated(ref data) => {
76                let filters = &self.info.filters;
77                if filters.len() == 0 {
78                    Ok(data.clone())
79                } else {
80                    use std::borrow::Cow;
81                    let mut data: Cow<[u8]> = (&**data).into();
82                    for filter in filters {
83                        data = t!(decode(&data, filter), filter).into();
84                    }
85                    Ok(data.into())
86                }
87            }
88            StreamData::Original(ref file_range, id) => {
89                resolve.get_data_or_decode(id, file_range.clone(), &self.info.filters)
90            }
91        }
92    }
93
94    pub fn len(&self) -> usize {
95        match self.inner_data {
96            StreamData::Generated(ref data) => data.len(),
97            StreamData::Original(ref range, _) => range.len()
98        }
99    }
100}
101
102impl<I: Object + fmt::Debug> fmt::Debug for Stream<I> {
103    fn fmt(&self, f: &mut fmt::Formatter) -> Result<(), fmt::Error> {
104        write!(f, "Stream info={:?}, len={}", self.info.info, self.len())
105    }
106}
107
108impl<I: Object> Object for Stream<I> {
109    /// Convert primitive to Self
110    fn from_primitive(p: Primitive, resolve: &impl Resolve) -> Result<Self> {
111        let s = PdfStream::from_primitive(p, resolve)?;
112        Stream::from_stream(s, resolve)
113    }
114}
115impl<I: ObjectWrite> Stream<I> {
116    pub fn to_pdf_stream(&self, update: &mut impl Updater) -> Result<PdfStream> {
117        let mut info = match self.info.info.to_primitive(update)? {
118            Primitive::Dictionary(dict) => dict,
119            Primitive::Null => Dictionary::new(),
120            p => bail!("stream info has to be a dictionary (found {:?})", p)
121        };
122        let mut params = None;
123        if self.info.filters.len() > 0 {
124            for f in self.info.filters.iter() {
125                if let Some(para) = match f {
126                    StreamFilter::LZWDecode(ref p) => Some(p.to_primitive(update)?),
127                    StreamFilter::FlateDecode(ref p) => Some(p.to_primitive(update)?),
128                    StreamFilter::DCTDecode(ref p) => Some(p.to_primitive(update)?),
129                    StreamFilter::CCITTFaxDecode(ref p) => Some(p.to_primitive(update)?),
130                    StreamFilter::JBIG2Decode(ref p) => Some(p.to_primitive(update)?),
131                    _ => None
132                } {
133                    assert!(params.is_none());
134                    params = Some(para);
135                }
136            }
137            let mut filters = self.info.filters.iter().map(|filter| match filter {
138                StreamFilter::ASCIIHexDecode => "ASCIIHexDecode",
139                StreamFilter::ASCII85Decode => "ASCII85Decode",
140                StreamFilter::LZWDecode(ref _p) => "LZWDecode",
141                StreamFilter::FlateDecode(ref _p) => "FlateDecode",
142                StreamFilter::JPXDecode => "JPXDecode",
143                StreamFilter::DCTDecode(ref _p) => "DCTDecode",
144                StreamFilter::CCITTFaxDecode(ref _p) => "CCITTFaxDecode",
145                StreamFilter::JBIG2Decode(ref _p) => "JBIG2Decode",
146                StreamFilter::Crypt => "Crypt",
147                StreamFilter::RunLengthDecode => "RunLengthDecode",
148            })
149            .map(|s| Primitive::Name(s.into()));
150            match self.info.filters.len() {
151                0 => {},
152                1 => {
153                    info.insert("Filter", filters.next().unwrap().to_primitive(update)?);
154                }
155                _ => {
156                    info.insert("Filter", Primitive::array::<Primitive, _, _, _>(filters, update)?);
157                }
158            }
159        }
160        if let Some(para) = params {
161            info.insert("DecodeParms", para);
162        }
163
164        let inner = match self.inner_data {
165            StreamData::Generated(ref data) => {
166                info.insert("Length", Primitive::Integer(data.len() as _));
167                StreamInner::Pending { data: data.clone() }
168            },
169            StreamData::Original(ref file_range, id) => {
170                info.insert("Length", Primitive::Integer(file_range.len() as _));
171                StreamInner::InFile { id, file_range: file_range.clone() }
172            }
173        };
174
175        Ok(PdfStream { info, inner })
176    }
177}
178impl<I: ObjectWrite> ObjectWrite for Stream<I> {
179    fn to_primitive(&self, update: &mut impl Updater) -> Result<Primitive> {
180        self.to_pdf_stream(update).map(Primitive::Stream)
181    }
182}
183impl<I: DeepClone> DeepClone for Stream<I> {
184    fn deep_clone(&self, cloner: &mut impl Cloner) -> Result<Self> {
185        let data = match self.inner_data {
186            StreamData::Generated(ref data) => data.clone(),
187            StreamData::Original(ref range, id) => cloner.stream_data(id, range.clone())?
188        };
189        Ok(Stream {
190            info: self.info.deep_clone(cloner)?,
191            inner_data: StreamData::Generated(data),
192        })
193    }
194}
195impl<I: Object> Deref for Stream<I> {
196    type Target = StreamInfo<I>;
197    fn deref(&self) -> &StreamInfo<I> {
198        &self.info
199    }
200}
201
202
203/// General stream type. `I` is the additional information to be read from the stream dict.
204#[derive(Debug, Clone, DataSize, DeepClone)]
205pub struct StreamInfo<I> {
206    // General dictionary entries
207    /// Filters that the `data` is currently encoded with (corresponds to both `/Filter` and
208    /// `/DecodeParms` in the PDF specs), constructed in `from_primitive()`.
209    pub filters: Vec<StreamFilter>,
210
211    /// Eventual file containing the stream contentst
212    pub file: Option<FileSpec>,
213    /// Filters to apply to external file specified in `file`.
214    pub file_filters: Vec<StreamFilter>,
215
216    // TODO:
217    /*
218    /// Filters to apply to external file specified in `file`.
219    #[pdf(key="FFilter")]
220    file_filters: Vec<StreamFilter>,
221    #[pdf(key="FDecodeParms")]
222    file_decode_parms: Vec<DecodeParms>,
223    /// Number of bytes in the decoded stream
224    #[pdf(key="DL")]
225    dl: Option<usize>,
226    */
227    // Specialized dictionary entries
228    pub info: I,
229}
230
231impl<I> Deref for StreamInfo<I> {
232    type Target = I;
233    fn deref(&self) -> &I {
234        &self.info
235    }
236}
237
238impl<I: Default> Default for StreamInfo<I> {
239    fn default() -> StreamInfo<I> {
240        StreamInfo {
241            filters: Vec::new(),
242            file: None,
243            file_filters: Vec::new(),
244            info: I::default(),
245        }
246    }
247}
248impl<T> StreamInfo<T> {
249/*
250    /// If the stream is not encoded, this is a no-op. `decode()` should be called whenever it's uncertain
251    /// whether the stream is encoded.
252    pub fn encode(&mut self, _filter: StreamFilter) {
253        // TODO this should add the filter to `self.filters` and encode the data with the given
254        // filter
255        unimplemented!();
256    }*/
257    pub fn get_filters(&self) -> &[StreamFilter] {
258        &self.filters
259    }
260}
261impl<T: Object> Object for StreamInfo<T> {
262    fn from_primitive(p: Primitive, resolve: &impl Resolve) -> Result<Self> {
263        let mut dict = Dictionary::from_primitive(p, resolve)?;
264
265        let _length = usize::from_primitive(
266            dict.remove("Length").ok_or(PdfError::MissingEntry{ typ: "StreamInfo", field: "Length".into() })?,
267            resolve)?;
268
269        let filters = Vec::<Name>::from_primitive(
270            dict.remove("Filter").unwrap_or(Primitive::Null),
271            resolve)?;
272
273        let decode_params = Vec::<Option<Dictionary>>::from_primitive(
274            dict.remove("DecodeParms").unwrap_or(Primitive::Null),
275            resolve)?;
276
277        let file = Option::<FileSpec>::from_primitive(
278            dict.remove("F").unwrap_or(Primitive::Null),
279            resolve)?;
280
281        let file_filters = Vec::<Name>::from_primitive(
282            dict.remove("FFilter").unwrap_or(Primitive::Null),
283            resolve)?;
284
285        let file_decode_params = Vec::<Dictionary>::from_primitive(
286            dict.remove("FDecodeParms").unwrap_or(Primitive::Null),
287            resolve)?;
288
289
290        let mut new_filters = Vec::new();
291        let mut new_file_filters = Vec::new();
292
293        for (i, filter) in filters.iter().enumerate() {
294            let params = match decode_params.get(i) {
295                Some(Some(params)) => params.clone(),
296                _ => Dictionary::default(),
297            };
298            new_filters.push(StreamFilter::from_kind_and_params(filter, params, resolve)?);
299        }
300        for (i, filter) in file_filters.iter().enumerate() {
301            let params = match file_decode_params.get(i) {
302                Some(params) => params.clone(),
303                None => Dictionary::default(),
304            };
305            new_file_filters.push(StreamFilter::from_kind_and_params(filter, params, resolve)?);
306        }
307
308        Ok(StreamInfo {
309            // General
310            filters: new_filters,
311            file,
312            file_filters: new_file_filters,
313            // Special
314            info: T::from_primitive(Primitive::Dictionary (dict), resolve)?,
315        })
316    }
317}
318
319#[derive(Object, Default, Debug, DataSize)]
320#[pdf(Type = "ObjStm")]
321pub struct ObjStmInfo {
322    #[pdf(key = "N")]
323    /// Number of compressed objects in the stream.
324    pub num_objects: usize,
325
326    #[pdf(key = "First")]
327    /// The byte offset in the decoded stream, of the first compressed object.
328    pub first: usize,
329
330    #[pdf(key = "Extends")]
331    /// A reference to an eventual ObjectStream which this ObjectStream extends.
332    pub extends: Option<Ref<Stream<()>>>,
333}
334
335#[derive(DataSize)]
336pub struct ObjectStream {
337    /// Byte offset of each object. Index is the object number.
338    offsets:    Vec<usize>,
339    /// The object number of this object.
340    _id:         ObjNr,
341    
342    inner:      Stream<ObjStmInfo>
343}
344
345impl Object for ObjectStream {
346    fn from_primitive(p: Primitive, resolve: &impl Resolve) -> Result<ObjectStream> {
347        let stream: Stream<ObjStmInfo> = Stream::from_primitive(p, resolve)?;
348
349        let mut offsets = Vec::new();
350        {
351            debug!("parsing stream");
352            let data = stream.data(resolve)?;
353            let mut lexer = Lexer::new(&data);
354            for _ in 0..(stream.info.num_objects as ObjNr) {
355                let _obj_nr = lexer.next()?.to::<ObjNr>()?;
356                let offset = lexer.next()?.to::<usize>()?;
357                offsets.push(offset);
358            }
359        }
360
361        Ok(ObjectStream {
362            offsets,
363            _id: 0, // TODO
364            inner: stream
365        })
366    }
367}
368
369impl ObjectStream {
370    pub fn get_object_slice(&self, index: usize, resolve: &impl Resolve) -> Result<(Arc<[u8]>, Range<usize>)> {
371        if index >= self.offsets.len() {
372            err!(PdfError::ObjStmOutOfBounds {index, max: self.offsets.len()});
373        }
374        let start = self.inner.info.first + self.offsets[index];
375        let data = self.inner.data(resolve)?;
376        let end = if index == self.offsets.len() - 1 {
377            data.len()
378        } else {
379            self.inner.info.first + self.offsets[index + 1]
380        };
381
382        Ok((data, start..end))
383    }
384    /// Returns the number of contained objects
385    pub fn n_objects(&self) -> usize {
386        self.offsets.len()
387    }
388    pub fn _data(&self, resolve: &impl Resolve) -> Result<Arc<[u8]>> {
389        self.inner.data(resolve)
390    }
391}