pdf/
build.rs

1use std::collections::HashMap;
2use std::collections::HashSet;
3use std::ops::Range;
4use std::sync::Arc;
5
6use datasize::DataSize;
7
8use crate::PdfError;
9use crate::any::AnySync;
10use crate::enc::StreamFilter;
11use crate::file::Cache;
12use crate::file::FileOptions;
13use crate::file::Log;
14use crate::file::Storage;
15use crate::file::Trailer;
16use crate::object::*;
17use crate::content::*;
18use crate::error::Result;
19use crate::parser::ParseFlags;
20use crate::primitive::Dictionary;
21use crate::primitive::Primitive;
22
23#[derive(Default)]
24pub struct PageBuilder {
25    pub ops: Vec<Op>,
26    pub media_box: Option<Rectangle>,
27    pub crop_box: Option<Rectangle>,
28    pub trim_box: Option<Rectangle>,
29    pub resources: Resources,
30    pub rotate: i32,
31    pub metadata: Option<Primitive>,
32    pub lgi: Option<Primitive>,
33    pub vp: Option<Primitive>,
34    pub other: Dictionary,
35}
36impl PageBuilder {
37    pub fn from_content(content: Content, resolve: &impl Resolve) -> Result<PageBuilder> {
38        Ok(PageBuilder {
39            ops: content.operations(resolve)?,
40            .. PageBuilder::default()
41        })
42    }
43    pub fn from_page(page: &Page, resolve: &impl Resolve) -> Result<PageBuilder> {
44        Ok(PageBuilder {
45            ops: page.contents.as_ref().map(|c| c.operations(resolve)).transpose()?.unwrap_or_default(),
46            media_box: Some(page.media_box()?),
47            crop_box: Some(page.crop_box()?),
48            trim_box: page.trim_box,
49            resources: (**page.resources()?.data()).clone(),
50            rotate: page.rotate,
51            metadata: page.metadata.clone(),
52            lgi: page.lgi.clone(),
53            vp: page.vp.clone(),
54            other: page.other.clone(),
55        })
56    }
57    pub fn clone_page(page: &Page, cloner: &mut impl Cloner) -> Result<PageBuilder> {
58        let old_resources = &**page.resources()?.data();
59
60        let mut resources = Resources::default();
61        let ops = page.contents.as_ref()
62            .map(|content| content.operations(cloner)).transpose()?
63            .map(|ops| {
64                ops.into_iter().map(|op| -> Result<Op, PdfError> {
65                    deep_clone_op(&op, cloner, old_resources, &mut resources)
66                }).collect()
67            })
68            .transpose()?
69            .unwrap_or_default();
70
71        Ok(PageBuilder {
72            ops,
73            media_box: Some(page.media_box()?),
74            crop_box: Some(page.crop_box()?),
75            trim_box: page.trim_box,
76            resources,
77            rotate: page.rotate,
78            metadata: page.metadata.deep_clone(cloner)?,
79            lgi: page.lgi.deep_clone(cloner)?,
80            vp: page.vp.deep_clone(cloner)?,
81            other: page.other.deep_clone(cloner)?,
82        })
83    }
84    pub fn size(&mut self, width: f32, height: f32) {
85        self.media_box = Some(Rectangle {
86            top: 0.,
87            left: 0.,
88            bottom: height,
89            right: width,
90        });
91    }
92}
93
94pub struct CatalogBuilder {
95    pages: Vec<PageBuilder>
96}
97impl CatalogBuilder {
98    pub fn from_pages(pages: Vec<PageBuilder>) -> CatalogBuilder {
99        CatalogBuilder {
100            pages
101        }
102    }
103    pub fn build(self, update: &mut impl Updater) -> Result<Catalog> {
104        let kids_promise: Vec<_> = self.pages.iter()
105            .map(|_page| update.promise::<PagesNode>())
106            .collect();
107        let kids: Vec<_> = kids_promise.iter()
108            .map(|p| Ref::new(p.get_inner()))
109            .collect();
110
111        let tree = PagesRc::create(PageTree {
112            parent: None,
113            count: kids.len() as _,
114            kids,
115            resources: None,
116            media_box: None,
117            crop_box: None
118        }, update)?;
119
120        for (page, promise) in self.pages.into_iter().zip(kids_promise) {
121            let content = Content::from_ops(page.ops);
122            let resources = update.create(page.resources)?.into();
123            let page = Page {
124                parent: tree.clone(),
125                contents: Some(content),
126                media_box: page.media_box,
127                crop_box: page.crop_box,
128                trim_box: page.trim_box,
129                resources: Some(resources),
130                rotate: page.rotate,
131                metadata: page.metadata,
132                lgi: page.lgi,
133                vp: page.vp,
134                other: page.other,
135                annotations: Default::default(),
136            };
137            update.fulfill(promise, PagesNode::Leaf(page))?;
138        }
139
140        Ok(Catalog {
141            version: Some("1.7".into()),
142            pages: tree,
143            names: None,
144            dests: None,
145            metadata: None,
146            outlines: None,
147            struct_tree_root: None,
148            forms: None,
149            page_labels: None,
150        })
151    }
152}
153
154pub struct PdfBuilder<SC, OC, L> {
155    pub storage: Storage<Vec<u8>, SC, OC, L>,
156    pub info: Option<InfoDict>,
157    pub id: Option<[String; 2]>,
158
159}
160impl<SC, OC, L> PdfBuilder<SC, OC, L>
161where
162    SC: Cache<Result<AnySync, Arc<PdfError>>>,
163    OC: Cache<Result<Arc<[u8]>, Arc<PdfError>>>,
164    L: Log,
165{
166    pub fn new(fileoptions: FileOptions<'_, SC, OC, L>) -> Self {
167        let storage = fileoptions.storage();
168        PdfBuilder {
169            storage,
170            info: None,
171            id: None
172        }
173    }
174    pub fn info(mut self, info: InfoDict) -> Self {
175        self.info = Some(info);
176        self
177    }
178    pub fn id(mut self, a: String, b: String) -> Self {
179        self.id = Some([a, b]);
180        self
181    }
182    pub fn build(mut self, catalog: CatalogBuilder) -> Result<Vec<u8>> {
183        let catalog = catalog.build(&mut self.storage)?;
184        
185        let mut trailer = Trailer {
186            root: self.storage.create(catalog)?,
187            encrypt_dict: None,
188            size: 0,
189            id: vec!["foo".into(), "bar".into()],
190            info_dict: self.info,
191            prev_trailer_pos: None,
192        };
193        self.storage.save(&mut trailer)?;
194        Ok(self.storage.into_inner())
195    }
196}
197pub struct Importer<'a, R, U> {
198    resolver: R,
199    map: HashMap<PlainRef, PlainRef>,
200    updater: &'a mut U,
201    rcrefs: HashMap<PlainRef, AnySync>,
202    // ptr of old -> (old, new)
203    shared: HashMap<usize, (AnySync, AnySync)>,
204}
205
206pub struct ImporterMap<R> {
207    resolver: R,
208    map: HashMap<PlainRef, PlainRef>,
209}
210
211impl<'a, R, U> Importer<'a, R, U> {
212    pub fn new(resolver: R, updater: &'a mut U) -> Self {
213        Importer {
214            resolver,
215            updater,
216            map: Default::default(),
217            rcrefs: Default::default(),
218            shared: Default::default(),
219        }
220    }
221}
222impl<'a, R: Resolve, U> Importer<'a, R, U> {
223    pub fn finish(self) -> ImporterMap<R> {
224        ImporterMap { resolver: self.resolver, map: self.map }
225    }
226}
227impl<R: Resolve> ImporterMap<R> {
228    fn compare_dict(&self, a_dict: &Dictionary, b_dict: &Dictionary, new_resolve: &impl Resolve) -> Result<bool> {
229        let mut same = true;
230        let mut b_unvisited: HashSet<_> = b_dict.keys().collect();
231        for (a_key, a_val) in a_dict.iter() {
232            if let Some(b_val) = b_dict.get(a_key) {
233                if !self.compare_prim(a_val, b_val, new_resolve)? {
234                    println!("value for key {a_key} mismatch.");
235                    same = false;
236                }
237                b_unvisited.remove(a_key);
238            } else {
239                println!("missing key {a_key} in b.");
240                same = false;
241            }
242        }
243        for b_key in b_unvisited.iter() {
244            println!("missing key {b_key} in a.");
245        }
246        Ok(same && !b_unvisited.is_empty())
247    }
248    fn compare_prim(&self, a: &Primitive, b: &Primitive, new_resolve: &impl Resolve) -> Result<bool> {
249        match (a, b) {
250            (Primitive::Array(a_parts), Primitive::Array(b_parts)) => {
251                if a_parts.len() != b_parts.len() {
252                    dbg!(a_parts, b_parts);
253                    println!("different length {} vs. {}", a_parts.len(), b_parts.len());
254                    println!("a = {a_parts:?}");
255                    println!("b = {b_parts:?}");
256                    return Ok(false);
257                }
258                for (a, b) in a_parts.iter().zip(b_parts.iter()) {
259                    if !self.compare_prim(a, b, new_resolve)? {
260                        return Ok(false);
261                    }
262                }
263                Ok(true)
264            }
265            (Primitive::Dictionary(a_dict), Primitive::Dictionary(b_dict)) => {
266                self.compare_dict(a_dict, b_dict, new_resolve)
267            }
268            (Primitive::Reference(r1), Primitive::Reference(r2)) => {
269                match self.map.get(&r1) {
270                    Some(r) if r == r2 => Ok(true),
271                    _ => Ok(false)
272                }
273            }
274            (Primitive::Stream(a_s), Primitive::Stream(b_s)) => {
275                if !self.compare_dict(&a_s.info, &b_s.info, new_resolve)? {
276                    println!("stream dicts differ");
277                    return Ok(false)
278                }
279                let a_data = a_s.raw_data(&self.resolver)?;
280                let b_data = b_s.raw_data(new_resolve)?;
281                if a_data != b_data {
282                    println!("data differs.");
283                    return Ok(false)
284                }
285                Ok(true)
286            }
287            (Primitive::Integer(a), Primitive::Number(b)) => Ok(*a as f32 == *b),
288            (Primitive::Number(a), Primitive::Integer(b)) => Ok(*a == *b as f32),
289            (Primitive::Reference(a_ref), b) => {
290                let a = self.resolver.resolve(*a_ref)?;
291                self.compare_prim(&a, b, new_resolve)
292            }
293            (a, Primitive::Reference(b_ref)) => {
294                let b = new_resolve.resolve(*b_ref)?;
295                self.compare_prim(a, &b, new_resolve)
296            }
297            (ref a, ref b) => {
298                if a == b {
299                    Ok(true)
300                } else {
301                    println!("{a:?} != {b:?}");
302                    Ok(false)
303                }
304            }
305        }
306    }
307    pub fn verify(&self, new_resolve: &impl Resolve) -> Result<bool> {
308        let mut same = true;
309        for (&old_ref, &new_ref) in self.map.iter() {
310            let old = self.resolver.resolve(old_ref)?;
311            let new = new_resolve.resolve(new_ref)?;
312
313            if !self.compare_prim(&old, &new, new_resolve)? {
314                same = false;
315            }
316        }
317        Ok(same)
318    }
319} 
320
321impl<'a, R: Resolve, U> Resolve for Importer<'a, R, U> {
322    fn get<T: Object+datasize::DataSize>(&self, r: Ref<T>) -> Result<RcRef<T>> {
323        self.resolver.get(r)
324    }
325    fn get_data_or_decode(&self, id: PlainRef, range: Range<usize>, filters: &[StreamFilter]) -> Result<Arc<[u8]>> {
326        self.resolver.get_data_or_decode(id, range, filters)
327    }
328    fn options(&self) -> &ParseOptions {
329        self.resolver.options()
330    }
331    fn resolve(&self, r: PlainRef) -> Result<Primitive> {
332        self.resolver.resolve(r)
333    }
334    fn resolve_flags(&self, r: PlainRef, flags: ParseFlags, depth: usize) -> Result<Primitive> {
335        self.resolver.resolve_flags(r, flags, depth)
336    }
337    fn stream_data(&self, id: PlainRef, range: Range<usize>) -> Result<Arc<[u8]>> {
338        self.resolver.stream_data(id, range)
339    }
340}
341impl<'a, R, U: Updater> Updater for Importer<'a, R, U> {
342    fn create<T: ObjectWrite>(&mut self, obj: T) -> Result<RcRef<T>> {
343        self.updater.create(obj)
344    }
345    fn fulfill<T: ObjectWrite>(&mut self, promise: PromisedRef<T>, obj: T) -> Result<RcRef<T>> {
346        self.updater.fulfill(promise, obj)
347    }
348    fn promise<T: Object>(&mut self) -> PromisedRef<T> {
349        self.updater.promise()
350    }
351    fn update<T: ObjectWrite>(&mut self, old: PlainRef, obj: T) -> Result<RcRef<T>> {
352        self.updater.update(old, obj)
353    }
354}
355impl<'a, R: Resolve, U: Updater> Cloner for Importer<'a, R, U> {
356    fn clone_ref<T: DeepClone + Object + DataSize + ObjectWrite>(&mut self, old: Ref<T>) -> Result<Ref<T>> {
357        if let Some(&new_ref) = self.map.get(&old.get_inner()) {
358            return Ok(Ref::new(new_ref));
359        }
360        let obj = self.resolver.get(old)?;
361        let clone = obj.deep_clone(self)?;
362
363        let r = self.updater.create(clone)?;
364        self.map.insert(old.get_inner(), r.get_ref().get_inner());
365
366        Ok(r.get_ref())
367    }
368    fn clone_plainref(&mut self, old: PlainRef) -> Result<PlainRef> {
369        if let Some(&new_ref) = self.map.get(&old) {
370            return Ok(new_ref);
371        }
372        let obj = self.resolver.resolve(old)?;
373        let clone = obj.deep_clone(self)?;
374
375        let new = self.updater.create(clone)?
376            .get_ref().get_inner();
377
378        self.map.insert(old, new);
379
380        Ok(new)
381    }
382    fn clone_rcref<T: DeepClone + ObjectWrite + DataSize>(&mut self, old: &RcRef<T>) -> Result<RcRef<T>> {
383        let old_ref = old.get_ref().get_inner();
384        if let Some(&new_ref) = self.map.get(&old_ref) {
385            let arc = self.rcrefs.get(&new_ref).unwrap().clone().downcast()?;
386            return Ok(RcRef::new(new_ref, arc));
387        }
388
389        let new = old.data().deep_clone(self)?;
390        let new = self.updater.create::<T>(new)?;
391        self.rcrefs.insert(new.get_ref().get_inner(), AnySync::new(new.data().clone()));
392        self.map.insert(old_ref, new.get_ref().get_inner());
393
394        Ok(new)
395    }
396    fn clone_shared<T: DeepClone>(&mut self, old: &Shared<T>) -> Result<Shared<T>> {
397        let key = &**old as *const T as usize;
398        if let Some((old, new)) = self.shared.get(&key) {
399            return new.clone().downcast();
400        }
401        let new = Shared::new(old.as_ref().deep_clone(self)?);
402        self.shared.insert(key, (AnySync::new_without_size(old.clone()), AnySync::new_without_size(new.clone())));
403        Ok(new)
404    }
405}