Skip to main content

pdf/
file.rs

1//! This is kind of the entry-point of the type-safe PDF functionality.
2use std::any::type_name;
3use std::marker::PhantomData;
4use std::collections::HashMap;
5use std::sync::{Arc, Mutex};
6use std::path::Path;
7use std::io::Write;
8
9use crate as pdf;
10use crate::error::*;
11use crate::object::*;
12use crate::primitive::{Dictionary, PdfStream, PdfString, Primitive, StreamInner};
13use crate::backend::{Backend, BackendAppend};
14use crate::any::*;
15use crate::parser::{Lexer, parse_with_lexer};
16use crate::parser::{parse_indirect_object, parse, ParseFlags};
17use crate::xref::{XRef, XRefTable, XRefInfo};
18use crate::crypt::Decoder;
19use crate::crypt::CryptDict;
20use crate::enc::{StreamFilter, decode};
21use std::ops::Range;
22use datasize::DataSize;
23
24#[cfg(feature="cache")]
25pub use globalcache::{ValueSize, sync::SyncCache};
26
27#[must_use]
28pub struct PromisedRef<T> {
29    inner:      PlainRef,
30    _marker:    PhantomData<T>
31}
32impl<T> PromisedRef<T> {
33    pub fn get_inner(&self) -> PlainRef {
34        self.inner
35    }
36    pub fn get_ref(&self) -> Ref<T> {
37        Ref::new(self.inner)
38    }
39}
40
41pub trait Cache<T: Clone> {
42    fn get_or_compute(&self, key: PlainRef, compute: impl FnOnce() -> T) -> T;
43    fn clear(&self);
44    fn remove(&self, key: PlainRef);
45}
46pub struct NoCache;
47impl<T: Clone> Cache<T> for NoCache {
48    fn get_or_compute(&self, _key: PlainRef, compute: impl FnOnce() -> T) -> T {
49        compute()
50    }
51    fn clear(&self) {}
52    fn remove(&self, _key: PlainRef) {}
53}
54
55#[cfg(feature="cache")]
56impl<T: Clone + ValueSize + Send + 'static> Cache<T> for Arc<SyncCache<PlainRef, T>> {
57    fn get_or_compute(&self, key: PlainRef, compute: impl FnOnce() -> T) -> T {
58        self.get(key, |_| compute())
59    }
60    fn clear(&self) {
61        (**self).clear()
62    }
63    fn remove(&self, key: PlainRef) {
64        (**self).remove(&key)
65    }
66}
67
68pub trait Log {
69    fn load_object(&self, _r: PlainRef) {}
70    fn log_get(&self, _r: PlainRef) {}
71}
72#[derive(Clone)]
73pub struct NoLog;
74impl Log for NoLog {}
75
76#[derive(Clone)]
77pub struct Storage<B, OC, SC, L> {
78    // objects identical to those in the backend
79    cache: OC,
80    stream_cache: SC,
81
82    // objects that differ from the backend
83    changes:    HashMap<ObjNr, (&'static str, Primitive, GenNr)>,
84    promises:   HashMap<ObjNr, &'static str>,
85
86    refs:       XRefTable,
87
88    decoder:    Option<Decoder>,
89    options:    ParseOptions,
90
91    backend:    B,
92
93    // Position of the PDF header in the file.
94    start_offset: usize,
95
96    log: L
97}
98
99const PDF_HEADER: &[u8] = b"%PDF-1.7\n";
100
101impl<OC, SC, L> Storage<Vec<u8>, OC, SC, L>
102where
103    OC: Cache<Result<AnySync, Arc<PdfError>>>,
104    SC: Cache<Result<Arc<[u8]>, Arc<PdfError>>>,
105    L: Log,
106{
107    pub fn empty(object_cache: OC, stream_cache: SC, log: L) -> Self {
108        Storage {
109            cache: object_cache,
110            stream_cache,
111            changes: HashMap::new(),
112            promises: HashMap::new(),
113            refs: XRefTable::new(0),
114            decoder: None,
115            options: ParseOptions::strict(),
116            backend: Vec::from(PDF_HEADER),
117            start_offset: 0,
118            log
119        }
120    }
121}
122
123impl<B, OC, SC, L> Storage<B, OC, SC, L>
124where
125    B: Backend,
126    OC: Cache<Result<AnySync, Arc<PdfError>>>,
127    SC: Cache<Result<Arc<[u8]>, Arc<PdfError>>>,
128    L: Log,
129{
130    pub fn into_inner(self) -> B {
131        self.backend
132    }
133    pub fn version(&self) -> Result<String> {
134        Ok(String::from_utf8(self.backend.read(self.start_offset+1..self.start_offset+8)?.to_owned())?)
135    }
136    pub fn resolver(&self) -> impl Resolve + '_ {
137        StorageResolver::new(self)
138    }
139    pub fn with_cache(backend: B, options: ParseOptions, object_cache: OC, stream_cache: SC, log: L) -> Result<Self> {
140        let start_offset = backend.locate_start_offset()?;
141
142        Ok(Storage {
143            start_offset,
144            backend,
145            refs: XRefTable::new(0),
146            cache: object_cache,
147            stream_cache,
148            changes: HashMap::new(),
149            promises: HashMap::new(),
150            decoder: None,
151            options,
152            log
153        })
154    }
155    fn decode(&self, id: PlainRef, range: Range<usize>, filters: &[StreamFilter]) -> Result<Arc<[u8]>> {
156        let data = self.backend.read(range)?;
157
158        let mut data = Vec::from(data);
159        if let Some(ref decoder) = self.decoder {
160            data = Vec::from(t!(decoder.decrypt(id, &mut data)));
161        }
162        for filter in filters {
163            data = t!(decode(&data, filter), filter);
164        }
165        Ok(data.into())
166    }
167
168    pub fn load_storage_and_trailer(&mut self) -> Result<Dictionary> {
169        self.load_storage_and_trailer_password(b"")
170    }
171
172    pub fn load_storage_and_trailer_password(&mut self, password: &[u8]) -> Result<Dictionary> {
173
174        let resolver = StorageResolver::new(self);
175        let (refs, trailer) = t!(self.backend.read_xref_table_and_trailer(self.start_offset, &resolver));
176        self.refs = refs;
177
178        if let Some(crypt) = trailer.get("Encrypt") {
179            let key = trailer
180                .get("ID")
181                .ok_or(PdfError::MissingEntry {
182                    typ: "Trailer",
183                    field: "ID".into(),
184                })?
185                .as_array()?
186                .get(0)
187                .ok_or(PdfError::MissingEntry {
188                    typ: "Trailer",
189                    field: "ID[0]".into()
190                })?
191                .as_string()?
192                .as_bytes();
193
194            let resolver = StorageResolver::new(self);
195            let dict = CryptDict::from_primitive(crypt.clone(), &resolver)?;
196
197            self.decoder = Some(t!(Decoder::from_password(&dict, key, password)));
198            if let Primitive::Reference(reference) = crypt {
199                self.decoder.as_mut().unwrap().encrypt_indirect_object = Some(*reference);
200            }
201            if let Some(Primitive::Reference(catalog_ref)) = trailer.get("Root") {
202                let resolver = StorageResolver::new(self);
203                let catalog = t!(t!(resolver.resolve(*catalog_ref)).resolve(&resolver)?.into_dictionary());
204                if let Some(Primitive::Reference(metadata_ref)) = catalog.get("Metadata") {
205                    self.decoder.as_mut().unwrap().metadata_indirect_object = Some(*metadata_ref);
206                }
207            }
208        }
209        Ok(trailer)
210    }
211    pub fn scan(&self) -> impl Iterator<Item = Result<ScanItem>> + '_ {
212        let xref_offset = self.backend.locate_xref_offset().unwrap();
213        let slice = self.backend.read(self.start_offset .. xref_offset).unwrap();
214        let mut lexer = Lexer::with_offset(slice, 0);
215
216        fn skip_xref(lexer: &mut Lexer) -> Result<()> {
217            while lexer.next()? != "trailer" {
218
219            }
220            Ok(())
221        }
222
223        let resolver = StorageResolver::new(self);
224        std::iter::from_fn(move || {
225            loop {
226                let pos = lexer.get_pos();
227                match parse_indirect_object(&mut lexer, &resolver, self.decoder.as_ref(), ParseFlags::all()) {
228                    Ok((r, p)) => return Some(Ok(ScanItem::Object(r, p))),
229                    Err(e) if e.is_eof() => return None,
230                    Err(e) => {
231                        lexer.set_pos(pos);
232                        if let Ok(s) = lexer.next() {
233                            debug!("next: {:?}", String::from_utf8_lossy(s.as_slice()));
234                            match &*s {
235                                b"xref" => {
236                                    if let Err(e) = skip_xref(&mut lexer) {
237                                        return Some(Err(e));
238                                    }
239                                    if let Ok(trailer) = parse_with_lexer(&mut lexer, &NoResolve, ParseFlags::DICT).and_then(|p| p.into_dictionary()) {
240                                        return Some(Ok(ScanItem::Trailer(trailer)));
241                                    }
242                                }
243                                b"startxref" if lexer.next().is_ok() => {
244                                    continue;
245                                }
246                                _ => {}
247                            }
248                        }
249                        return Some(Err(e));
250                    }
251                }
252            }
253        })
254    }
255    fn resolve_ref(&self, r: PlainRef, flags: ParseFlags, resolve: &impl Resolve) -> Result<Primitive> {
256        match self.changes.get(&r.id) {
257            Some((_, p, _)) => Ok((*p).clone()),
258            None => match t!(self.refs.get(r.id)) {
259                XRef::Raw {pos, ..} => {
260                    let mut lexer = Lexer::with_offset(t!(self.backend.read(self.start_offset + pos ..), r), self.start_offset + pos);
261                    let p = t!(parse_indirect_object(&mut lexer, resolve, self.decoder.as_ref(), flags), r).1;
262                    Ok(p)
263                }
264                XRef::Stream {stream_id, index} => {
265                    if !flags.contains(ParseFlags::STREAM) {
266                        return Err(PdfError::PrimitiveNotAllowed { found: ParseFlags::STREAM, allowed: flags });
267                    }
268                    // use get to cache the object stream
269                    let obj_stream = resolve.get::<ObjectStream>(Ref::from_id(stream_id))?;
270
271                    let (data, range) = t!(obj_stream.get_object_slice(index, resolve));
272                    let slice = data.get(range.clone()).ok_or_else(|| other!("invalid range {:?}, but only have {} bytes", range, data.len()))?;
273                    parse(slice, resolve, flags)
274                }
275                XRef::Free {..} => err!(PdfError::FreeObject {obj_nr: r.id}),
276                XRef::Promised => unimplemented!(),
277                XRef::Invalid => err!(PdfError::NullRef {obj_nr: r.id}),
278            }
279        }
280    }
281}
282
283pub enum ScanItem {
284    Object(PlainRef, Primitive),
285    Trailer(Dictionary)
286}
287
288struct StorageResolver<'a, B, OC, SC, L> {
289    storage: &'a Storage<B, OC, SC, L>,
290    chain: Mutex<Vec<PlainRef>>,
291}
292impl<'a, B, OC, SC, L> StorageResolver<'a, B, OC, SC, L> {
293    pub fn new(storage: &'a Storage<B, OC, SC, L>) -> Self {
294        StorageResolver {
295            storage,
296            chain: Mutex::new(vec![])
297        }
298    }
299}
300
301struct Defer<F: FnMut()>(F);
302impl<F: FnMut()> Drop for Defer<F> {
303    fn drop(&mut self) {
304        (self.0)();
305    }
306}
307
308fn resolve_get<'a, B, OC, SC, L, T>(resolver: &impl Resolve, storage: &'a Storage<B, OC, SC, L>, chain: &Mutex<Vec<PlainRef>>, r: Ref<T>) -> Result<RcRef<T>>
309where
310    B: Backend,
311    OC: Cache<Result<AnySync, Arc<PdfError>>>,
312    SC: Cache<Result<Arc<[u8]>, Arc<PdfError>>>,
313    L: Log,
314    T: Object+DataSize
315{
316    let key = r.get_inner();
317    storage.log.log_get(key);
318
319    {
320        debug!("get {key:?} as {}", std::any::type_name::<T>());
321        let mut chain = chain.lock().unwrap();
322        if chain.contains(&key) {
323            bail!("Recursive reference");
324        }
325        chain.push(key);
326    }
327    let _defer = Defer(|| {
328        let mut chain = chain.lock().unwrap();
329        assert_eq!(chain.pop(), Some(key));
330    });
331
332    let res = storage.cache.get_or_compute(key, || {
333        match resolver.resolve(key).and_then(|p| T::from_primitive(p, resolver)) {
334            Ok(obj) => Ok(AnySync::new(Shared::new(obj))),
335            Err(e) => {
336                let p = resolver.resolve(key);
337                warn!("failed to decode {p:?} as {}", std::any::type_name::<T>());
338                Err(Arc::new(e))
339            }
340        }
341    });
342    match res {
343        Ok(any) => {
344            match any.downcast() {
345                Ok(val) => Ok(RcRef::new(key, val)),
346                Err(_) => {
347                    let p = resolver.resolve(key)?;
348                    Ok(RcRef::new(key, T::from_primitive(p, resolver)?.into()))
349                }
350            }
351        }
352        Err(e) => Err(PdfError::Shared { source: e.clone()}),
353    }
354}
355
356impl<'a, B, OC, SC, L> Resolve for StorageResolver<'a, B, OC, SC, L>
357where
358    B: Backend,
359    OC: Cache<Result<AnySync, Arc<PdfError>>>,
360    SC: Cache<Result<Arc<[u8]>, Arc<PdfError>>>,
361    L: Log
362{
363    fn resolve_flags(&self, r: PlainRef, flags: ParseFlags, _depth: usize) -> Result<Primitive> {
364        let storage = self.storage;
365        storage.log.load_object(r);
366
367        storage.resolve_ref(r, flags, self)
368    }
369
370    fn get<T: Object+DataSize>(&self, r: Ref<T>) -> Result<RcRef<T>> {
371        resolve_get(self, &self.storage, &self.chain, r)
372    }
373    fn options(&self) -> &ParseOptions {
374        &self.storage.options
375    }
376    fn stream_data(&self, id: PlainRef, range: Range<usize>) -> Result<Arc<[u8]>> {
377        self.storage.decode(id, range, &[])
378    }
379    fn get_data_or_decode(&self, id: PlainRef, range: Range<usize>, filters: &[StreamFilter]) -> Result<Arc<[u8]>> {
380        self.storage.stream_cache.get_or_compute(id, || self.storage.decode(id, range, filters).map_err(Arc::new))
381        .map_err(|e| e.into())
382    }
383}
384
385impl<B, OC, SC, L> Updater for Storage<B, OC, SC, L>
386where
387    B: Backend,
388    OC: Cache<Result<AnySync, Arc<PdfError>>>,
389    SC: Cache<Result<Arc<[u8]>, Arc<PdfError>>>,
390    L: Log,
391{
392    fn create<T: ObjectWrite>(&mut self, obj: T) -> Result<RcRef<T>> {
393        let id = self.refs.len() as u64;
394        self.refs.push(XRef::Promised);
395
396        let primitive = obj.to_primitive(self)?;
397        self.changes.insert(id, (std::any::type_name::<T>(), primitive, 0));
398        let rc = Shared::new(obj);
399        let r = PlainRef { id, gen: 0 };
400
401        Ok(RcRef::new(r, rc))
402    }
403    fn update_with<T: ObjectWrite>(&mut self, old: PlainRef, obj: T, options: UpdateOptions) -> Result<RcRef<T>> {
404        use std::collections::hash_map::Entry;
405
406        let r = match self.refs.get(old.id)? {
407            XRef::Free { .. } => panic!(),
408            XRef::Raw { gen_nr, .. } => PlainRef { id: old.id, gen: gen_nr },
409            XRef::Stream { .. } => return self.create(obj),
410            XRef::Promised => PlainRef { id: old.id, gen: 0 },
411            XRef::Invalid => panic!()
412        };
413        let primitive = obj.to_primitive(self)?;
414        match self.changes.entry(old.id) {
415            Entry::Vacant(e) => {
416                e.insert((std::any::type_name::<T>(), primitive, r.gen));
417            }
418            Entry::Occupied(mut e) => {
419                if options.merge {
420                    match (e.get_mut(), primitive) {
421                        ((_, Primitive::Dictionary(ref mut dict), _), Primitive::Dictionary(new)) => {
422                            dict.append(new);
423                        }
424                        ((old_name, old_prim, _), new_prim) => {
425                            bail!(
426                                "can't update previous type {} which is {} with data from new type {} which is {}",
427                                old_name, old_prim.get_debug_name(), std::any::type_name::<T>(), new_prim.get_debug_name()
428                            );
429                        }
430                    }
431                } else {
432                    e.insert((std::any::type_name::<T>(), primitive, r.gen));
433                }
434            }
435        }
436        let rc = Shared::new(obj);
437        self.cache.remove(old);
438
439        Ok(RcRef::new(r, rc))
440    }
441
442    fn promise<T: Object>(&mut self) -> PromisedRef<T> {
443        let id = self.refs.len() as u64;
444
445        self.refs.push(XRef::Promised);
446        self.promises.insert(id, type_name::<T>());
447
448        PromisedRef {
449            inner: PlainRef {
450                id,
451                gen: 0
452            },
453            _marker:    PhantomData
454        }
455    }
456
457    fn fulfill<T: ObjectWrite>(&mut self, promise: PromisedRef<T>, obj: T) -> Result<RcRef<T>> {
458        let r = self.update(promise.inner, obj)?;
459        self.promises.remove(&promise.inner.id);
460        Ok(r)
461    }
462}
463
464#[derive(Default)]
465pub struct UpdateOptions {
466    pub merge: bool
467}
468
469impl<OC, SC, L, B> Storage<B, OC, SC, L>
470where
471    B: BackendAppend,
472    OC: Cache<Result<AnySync, Arc<PdfError>>>,
473    SC: Cache<Result<Arc<[u8]>, Arc<PdfError>>>,
474    L: Log
475{
476    pub fn rebuild(&mut self, trailer: &mut Trailer) -> Result<Vec<u8>> {
477        if self.promises.len() > 0 {
478            for (id, name) in self.promises.iter() {
479                eprintln!("Promise unfulfilled: id={id} type={name}");
480                return Err(PdfError::Invalid);
481            }
482        }
483
484        // writing the trailer generates another id for the info dictionary
485        trailer.size = (self.refs.len() + 2) as _;
486        let trailer_dict = trailer.to_dict(self)?;
487        let xref_promise = self.promise::<Stream<XRefInfo>>();
488
489        let mut out = Vec::new();
490        out.extend_from_slice(PDF_HEADER);
491        let mut new_refs = HashMap::new();
492        let resolver = StorageResolver::new(self);
493        for (obj_nr, r) in self.refs.entries() {
494            let id = obj_nr as u64;
495            let prim = t!(self.resolve_ref(PlainRef { id, gen: 0 }, ParseFlags::ANY, &resolver), id, r);
496
497            let byte_pos = out.len();
498            writeln!(out, "{id} 0 obj")?;
499            match prim {
500                Primitive::Stream(PdfStream { inner: StreamInner::InFile { file_range, .. }, info }) => {
501                    let data = self.backend.read(file_range)?;
502
503                    let info_len = info.get("Length").unwrap().as_usize().unwrap();
504                    assert_eq!(info_len, data.len());
505                    t!(info.serialize(&mut out));
506                    writeln!(out, "stream")?;
507                    out.write_all(&data)?;
508                    writeln!(out, "\nendstream")?;
509                }
510                prim => {
511                    t!(prim.serialize(&mut out));
512                }
513            }
514
515            writeln!(out, "endobj")?;
516            new_refs.insert(id, XRef::Raw { pos: byte_pos, gen_nr: 0 });
517        }
518        for (&id, (_, prim, _)) in self.changes.iter() {
519            match self.refs.get(id) {
520                Ok(XRef::Raw { .. }) | Ok(XRef::Stream { .. }) => continue,
521                _ => {}
522            }
523            let byte_pos = out.len();
524            writeln!(out, "{id} 0 obj")?;
525            prim.serialize(&mut out)?;
526            if !out.last_byte().map(|b| b.is_ascii_whitespace()).unwrap_or(false) {
527                out.write_all(b"\n")?;
528            }
529            writeln!(out, "endobj")?;
530            new_refs.insert(id, XRef::Raw { pos: byte_pos, gen_nr: 0 });
531        }
532
533        for (id, xref) in new_refs {
534            self.refs.set(id, xref);
535        }
536
537        let xref_pos = out.len();
538        self.refs.set(xref_promise.get_inner().id, XRef::Raw { pos: xref_pos, gen_nr: 0 });
539        // only write up to the xref stream obj id
540
541        let stream = t!(self.refs.write_stream(xref_promise.get_inner().id as usize + 1));
542
543        writeln!(out, "{} {} obj", xref_promise.get_inner().id, 0)?;
544        let mut xref_and_trailer = stream.to_pdf_stream(&mut NoUpdate)?;
545        for (k, v) in trailer_dict.iter() {
546            xref_and_trailer.info.insert(k.clone(), v.clone());
547        }
548
549        xref_and_trailer.serialize(&mut out)?;
550        writeln!(out, "endobj")?;
551
552        let _ = self.fulfill(xref_promise, stream)?;
553
554        write!(out, "\nstartxref\n{}\n%%EOF", xref_pos).unwrap();
555
556        // update trailer which may have change now.
557        *trailer = t!(Trailer::from_dict(trailer_dict, &self.resolver()));
558        self.cache.clear();
559
560        Ok(out)
561    }
562    pub fn save(&mut self, trailer: &mut Trailer) -> Result<()> {
563        if self.promises.len() > 0 {
564            for (id, name) in self.promises.iter() {
565                eprintln!("Promise unfulfilled: id={id} type={name}");
566                return Err(PdfError::Invalid);
567            }
568        }
569
570        // writing the trailer generates another id for the info dictionary
571        trailer.size = (self.refs.len() + 2) as _;
572        let trailer_dict = trailer.to_dict(self)?;
573
574        let xref_promise = self.promise::<Stream<XRefInfo>>();
575
576        let mut changes: Vec<_> = self.changes.iter().collect();
577        changes.sort_unstable_by_key(|&(id, _)| id);
578
579        for &(&id, &(_name, ref primitive, gen)) in changes.iter() {
580            let pos = self.backend.len();
581            self.refs.set(id, XRef::Raw { pos: pos as _, gen_nr: gen });
582            writeln!(self.backend, "{id} {gen} obj")?;
583            primitive.serialize(&mut self.backend)?;
584            if !self.backend.last_byte().map(|b| b.is_ascii_whitespace()).unwrap_or(false) {
585                self.backend.write_all(b"\n")?;
586            }
587            writeln!(self.backend, "endobj")?;
588        }
589
590        let xref_pos = self.backend.len();
591        self.refs.set(xref_promise.get_inner().id, XRef::Raw { pos: xref_pos, gen_nr: 0 });
592        // only write up to the xref stream obj id
593        let stream = self.refs.write_stream(xref_promise.get_inner().id as usize + 1)?;
594
595        writeln!(self.backend, "{} {} obj", xref_promise.get_inner().id, 0)?;
596        let mut xref_and_trailer = stream.to_pdf_stream(&mut NoUpdate)?;
597        for (k, v) in trailer_dict.iter() {
598            xref_and_trailer.info.insert(k.clone(), v.clone());
599        }
600
601        xref_and_trailer.serialize(&mut self.backend)?;
602        writeln!(self.backend, "endobj")?;
603
604        let _ = self.fulfill(xref_promise, stream)?;
605
606        write!(self.backend, "\nstartxref\n{}\n%%EOF", xref_pos).unwrap();
607
608        // update trailer which may have change now.
609        self.cache.clear();
610        *trailer = Trailer::from_dict(trailer_dict, &self.resolver())?;
611
612        Ok(())
613    }
614}
615
616#[cfg(feature="cache")]
617pub type ObjectCache = Arc<SyncCache<PlainRef, Result<AnySync, Arc<PdfError>>>>;
618#[cfg(feature="cache")]
619pub type StreamCache = Arc<SyncCache<PlainRef, Result<Arc<[u8]>, Arc<PdfError>>>>;
620#[cfg(feature="cache")]
621pub type CachedFile<B> = File<B, ObjectCache, StreamCache, NoLog>;
622
623pub struct File<B, OC, SC, L> {
624    pub storage:    Storage<B, OC, SC, L>,
625    pub trailer:    Trailer,
626    resolve_chain:  Mutex<Vec<PlainRef>>,
627}
628impl<B: Clone, OC: Clone, SC: Clone, L: Clone> Clone for File<B, OC, SC, L> {
629    fn clone(&self) -> Self {
630        File {
631            storage: self.storage.clone(),
632            trailer: self.trailer.clone(),
633            resolve_chain: Default::default()
634        }
635    }
636}
637
638impl<B, OC, SC, L> Updater for File<B, OC, SC, L>
639where
640    B: Backend,
641    OC: Cache<Result<AnySync, Arc<PdfError>>>,
642    SC: Cache<Result<Arc<[u8]>, Arc<PdfError>>>,
643    L: Log,
644{
645    fn create<T: ObjectWrite>(&mut self, obj: T) -> Result<RcRef<T>> {
646        self.storage.create(obj)
647    }
648    fn update_with<T: ObjectWrite>(&mut self, old: PlainRef, obj: T, options: UpdateOptions) -> Result<RcRef<T>> {
649        self.storage.update_with(old, obj, options)
650    }
651    fn promise<T: Object>(&mut self) -> PromisedRef<T> {
652        self.storage.promise()
653    }
654    fn fulfill<T: ObjectWrite>(&mut self, promise: PromisedRef<T>, obj: T) -> Result<RcRef<T>> {
655        self.storage.fulfill(promise, obj)
656    }
657}
658impl<B, OC, SC, L> Resolve for File<B, OC, SC, L>
659where
660    B: Backend,
661    OC: Cache<Result<AnySync, Arc<PdfError>>>,
662    SC: Cache<Result<Arc<[u8]>, Arc<PdfError>>>,
663    L: Log,
664{
665    fn resolve_flags(&self, r: PlainRef, flags: ParseFlags, _depth: usize) -> Result<Primitive> {
666        let ref storage = self.storage;
667        storage.log.load_object(r);
668
669        storage.resolve_ref(r, flags, self)
670    }
671
672    fn get<T: Object+DataSize>(&self, r: Ref<T>) -> Result<RcRef<T>> {
673        resolve_get(self, &self.storage, &self.resolve_chain, r)
674    }
675    fn options(&self) -> &ParseOptions {
676        &self.storage.options
677    }
678    fn stream_data(&self, id: PlainRef, range: Range<usize>) -> Result<Arc<[u8]>> {
679        self.storage.decode(id, range, &[])
680    }
681    fn get_data_or_decode(&self, id: PlainRef, range: Range<usize>, filters: &[StreamFilter]) -> Result<Arc<[u8]>> {
682        self.storage.stream_cache.get_or_compute(id, || self.storage.decode(id, range, filters).map_err(Arc::new))
683        .map_err(|e| e.into())
684    }
685}
686impl<OC, SC, L> File<Vec<u8>, OC, SC, L>
687where
688    OC: Cache<Result<AnySync, Arc<PdfError>>>,
689    SC: Cache<Result<Arc<[u8]>, Arc<PdfError>>>,
690    L: Log,
691{
692    pub fn save_to(&mut self, path: impl AsRef<Path>) -> Result<()> {
693        self.storage.save(&mut self.trailer)?;
694        std::fs::write(path, &self.storage.backend)?;
695        Ok(())
696    }
697    pub fn rebuild(&mut self) -> Result<Vec<u8>> {
698        let data = self.storage.rebuild(&mut self.trailer)?;
699        Ok(data)
700    }
701}
702
703
704pub struct FileOptions<'a, OC, SC, L> {
705    oc: OC,
706    sc: SC,
707    log: L,
708    password: &'a [u8],
709    parse_options: ParseOptions,
710}
711impl FileOptions<'static, NoCache, NoCache, NoLog> {
712    pub fn uncached() -> Self {
713        FileOptions {
714            oc: NoCache,
715            sc: NoCache,
716            password: b"",
717            parse_options: ParseOptions::strict(),
718            log: NoLog,
719        }
720    }
721}
722
723#[cfg(feature="cache")]
724impl FileOptions<'static, ObjectCache, StreamCache, NoLog> {
725    pub fn cached() -> Self {
726        FileOptions {
727            oc: SyncCache::new(),
728            sc: SyncCache::new(),
729            password: b"",
730            parse_options: ParseOptions::strict(),
731            log: NoLog
732        }
733    }
734}
735impl<'a, OC, SC, L> FileOptions<'a, OC, SC, L>
736where
737    OC: Cache<Result<AnySync, Arc<PdfError>>>,
738    SC: Cache<Result<Arc<[u8]>, Arc<PdfError>>>,
739    L: Log,
740{
741    pub fn password(self, password: &'a [u8]) -> FileOptions<'a, OC, SC, L> {
742        FileOptions {
743            password,
744            .. self
745        }
746    }
747    pub fn cache<O, S>(self, oc: O, sc: S) -> FileOptions<'a, O, S, L> {
748        let FileOptions { oc: _, sc: _, password, parse_options, log } = self;
749        FileOptions {
750            oc,
751            sc,
752            password,
753            parse_options,
754            log,
755        }
756    }
757    pub fn log<Log>(self, log: Log) -> FileOptions<'a, OC, SC, Log> {
758        let FileOptions { oc, sc, password, parse_options, .. } = self;
759        FileOptions {
760            oc,
761            sc,
762            password,
763            parse_options,
764            log,
765        }
766    }
767    pub fn parse_options(self, parse_options: ParseOptions) -> Self {
768        FileOptions { parse_options, .. self }
769    }
770
771    /// open a file
772    pub fn open(self, path: impl AsRef<Path>) -> Result<File<Vec<u8>, OC, SC, L>> {
773        let data = std::fs::read(path)?;
774        self.load(data)
775    }
776    pub fn storage(self) -> Storage<Vec<u8>, OC, SC, L> {
777        let FileOptions { oc, sc, log, .. } = self;
778        Storage::empty(oc, sc, log)
779    }
780
781    /// load data from the given backend
782    pub fn load<B: Backend>(self, backend: B) -> Result<File<B, OC, SC, L>> {
783        let FileOptions { oc, sc, password, parse_options, log } = self;
784        File::load_data(backend, password, parse_options, oc, sc, log)
785    }
786}
787
788
789impl<B, OC, SC, L> File<B, OC, SC, L>
790where
791    B: Backend,
792    OC: Cache<Result<AnySync, Arc<PdfError>>>,
793    SC: Cache<Result<Arc<[u8]>, Arc<PdfError>>>,
794    L: Log,
795{
796    pub fn load_data(backend: B, password: &[u8], options: ParseOptions, object_cache: OC, stream_cache: SC, log: L) -> Result<Self> {
797        let mut storage = Storage::with_cache(backend, options, object_cache, stream_cache, log)?;
798        let trailer = storage.load_storage_and_trailer_password(password)?;
799
800        let resolver = StorageResolver::new(&storage);
801        let trailer = t!(Trailer::from_primitive(
802            Primitive::Dictionary(trailer),
803            &resolver,
804        ));
805        Ok(File { storage, trailer, resolve_chain: Default::default() })
806    }
807    pub fn new(storage: Storage<B, OC, SC, L>, trailer: Trailer) -> Self {
808        File { storage, trailer, resolve_chain: Default::default() }
809    }
810    pub fn resolver(&self) -> impl Resolve + '_ {
811        StorageResolver::new(&self.storage)
812    }
813
814    pub fn get_root(&self) -> &Catalog {
815        &self.trailer.root
816    }
817
818    pub fn pages(&self) -> impl Iterator<Item=Result<PageRc>> + '_ {
819        (0 .. self.num_pages()).map(move |n| self.get_page(n))
820    }
821    pub fn num_pages(&self) -> u32 {
822        self.trailer.root.pages.count
823    }
824
825    pub fn get_page(&self, n: u32) -> Result<PageRc> {
826        let resolver = StorageResolver::new(&self.storage);
827        self.trailer.root.pages.page(&resolver, n)
828    }
829
830    pub fn update_catalog(&mut self, catalog: Catalog) -> Result<()> {
831        self.trailer.root = self.create(catalog)?;
832        Ok(())
833    }
834
835    pub fn set_options(&mut self, options: ParseOptions) {
836        self.storage.options = options;
837    }
838
839    pub fn scan(&self) -> impl Iterator<Item = Result<ScanItem>> + '_ {
840        self.storage.scan()
841    }
842
843    pub fn log(&self) -> &L {
844        &self.storage.log
845    }
846
847    /// the version string in the file header.
848    /// if the version field in the Catalog is set, this should be used instead.
849    pub fn version(&self) -> Result<String> {
850        self.storage.version()
851    }
852}
853
854#[derive(Clone, Object, ObjectWrite, DataSize)]
855pub struct Trailer {
856    #[pdf(key = "Size")]
857    pub size:               i32,
858
859    #[pdf(key = "Prev")]
860    pub prev_trailer_pos:   Option<i32>,
861
862    #[pdf(key = "Root")]
863    pub root:               RcRef<Catalog>,
864
865    #[pdf(key = "Encrypt")]
866    pub encrypt_dict:       Option<RcRef<CryptDict>>,
867
868    #[pdf(key = "Info", indirect)]
869    pub info_dict:          Option<InfoDict>,
870
871    #[pdf(key = "ID")]
872    pub id:                 Vec<PdfString>,
873}
874
875/*
876pub struct XRefStream {
877    pub data: Vec<u8>,
878    pub info: XRefInfo,
879}
880
881impl Object for XRefStream {
882    fn serialize<W: io::Write>(&self, _out: &mut W) -> io::Result<()> {
883        unimplemented!();
884    }
885    fn from_primitive(p: Primitive, resolve: &impl Resolve) -> Result<Self> {
886        let stream = p.to_stream(resolve)?;
887        let info = XRefInfo::from_primitive(Primitive::Dictionary (stream.info), resolve)?;
888        let data = stream.data.clone();
889        Ok(XRefStream {
890            data: data,
891            info: info,
892        })
893    }
894}
895*/