Skip to main content

folio_doc/
pdfdoc.rs

1//! PdfDoc — high-level PDF document.
2
3use folio_core::{FolioError, Rect, Result};
4use folio_cos::{CosDoc, ObjectId, PdfObject};
5use indexmap::IndexMap;
6
7use crate::info::DocInfo;
8use crate::page::Page;
9
10/// A high-level PDF document.
11///
12/// This wraps `CosDoc` and provides PDF-specific operations like
13/// page management, metadata access, and save.
14pub struct PdfDoc {
15    cos: CosDoc,
16}
17
18impl PdfDoc {
19    /// Create a new empty PDF document.
20    pub fn new() -> Result<Self> {
21        let mut cos = CosDoc::new();
22
23        // Create Pages dict
24        let mut pages_dict = IndexMap::new();
25        pages_dict.insert(b"Type".to_vec(), PdfObject::Name(b"Pages".to_vec()));
26        pages_dict.insert(b"Kids".to_vec(), PdfObject::Array(vec![]));
27        pages_dict.insert(b"Count".to_vec(), PdfObject::Integer(0));
28        let pages_id = cos.create_indirect(PdfObject::Dict(pages_dict));
29
30        // Create Catalog
31        let mut catalog_dict = IndexMap::new();
32        catalog_dict.insert(b"Type".to_vec(), PdfObject::Name(b"Catalog".to_vec()));
33        catalog_dict.insert(b"Pages".to_vec(), PdfObject::Reference(pages_id));
34        let catalog_id = cos.create_indirect(PdfObject::Dict(catalog_dict));
35
36        // Set trailer Root
37        cos.trailer_mut()
38            .insert(b"Root".to_vec(), PdfObject::Reference(catalog_id));
39
40        Ok(Self { cos })
41    }
42
43    /// Open a PDF document from a file path.
44    pub fn open(path: &str) -> Result<Self> {
45        let cos = CosDoc::open_file(path)?;
46        Ok(Self { cos })
47    }
48
49    /// Open a PDF document from bytes.
50    pub fn open_from_bytes(data: Vec<u8>) -> Result<Self> {
51        let cos = CosDoc::open(data)?;
52        Ok(Self { cos })
53    }
54
55    /// Get the underlying CosDoc for low-level access.
56    pub fn cos(&self) -> &CosDoc {
57        &self.cos
58    }
59
60    /// Get the underlying CosDoc mutably.
61    pub fn cos_mut(&mut self) -> &mut CosDoc {
62        &mut self.cos
63    }
64
65    /// Get the catalog dictionary's object reference.
66    fn catalog_ref(&self) -> Result<ObjectId> {
67        self.cos
68            .trailer()
69            .get(b"Root".as_slice())
70            .and_then(|o| o.as_reference())
71            .ok_or_else(|| FolioError::InvalidObject("Missing /Root in trailer".into()))
72    }
73
74    /// Get the Pages dictionary reference from the catalog.
75    fn pages_ref(&mut self) -> Result<ObjectId> {
76        let catalog_ref = self.catalog_ref()?;
77        let catalog = self
78            .cos
79            .get_object(catalog_ref.num)?
80            .ok_or_else(|| FolioError::InvalidObject("Catalog not found".into()))?
81            .clone();
82
83        catalog
84            .dict_get(b"Pages")
85            .and_then(|o| o.as_reference())
86            .ok_or_else(|| FolioError::InvalidObject("Missing /Pages in catalog".into()))
87    }
88
89    /// Get the number of pages in the document.
90    pub fn page_count(&mut self) -> Result<u32> {
91        let pages_ref = self.pages_ref()?;
92        let pages = self
93            .cos
94            .get_object(pages_ref.num)?
95            .ok_or_else(|| FolioError::InvalidObject("Pages dict not found".into()))?
96            .clone();
97
98        pages
99            .dict_get_i64(b"Count")
100            .map(|c| c as u32)
101            .ok_or_else(|| FolioError::InvalidObject("Missing /Count in Pages".into()))
102    }
103
104    /// Get a page by 1-based index.
105    pub fn get_page(&mut self, page_num: u32) -> Result<Page> {
106        if page_num == 0 {
107            return Err(FolioError::InvalidArgument(
108                "Page numbers are 1-based".into(),
109            ));
110        }
111
112        let page_refs = self.collect_page_refs()?;
113        let index = (page_num - 1) as usize;
114
115        if index >= page_refs.len() {
116            return Err(FolioError::InvalidArgument(format!(
117                "Page {} out of range (document has {} pages)",
118                page_num,
119                page_refs.len()
120            )));
121        }
122
123        let page_ref = page_refs[index];
124        let page_obj = self
125            .cos
126            .get_object(page_ref.num)?
127            .ok_or_else(|| {
128                FolioError::InvalidObject(format!("Page object {} not found", page_ref.num))
129            })?
130            .clone();
131
132        // Resolve inherited attributes from parent Pages nodes.
133        // Per PDF spec §7.7.3.4, these keys are inheritable:
134        // MediaBox, CropBox, Rotate, Resources
135        let page_obj = self.resolve_inherited_attrs(page_obj)?;
136
137        Ok(Page::new(page_ref, page_obj, page_num))
138    }
139
140    /// Resolve inherited attributes by walking up the /Parent chain.
141    ///
142    /// PDF spec §7.7.3.4: MediaBox, CropBox, Rotate, and Resources
143    /// are inheritable — if not present on the page dict, they are
144    /// inherited from the nearest ancestor Pages node that defines them.
145    fn resolve_inherited_attrs(&mut self, page_obj: PdfObject) -> Result<PdfObject> {
146        const INHERITABLE: &[&[u8]] = &[b"MediaBox", b"CropBox", b"Rotate", b"Resources"];
147
148        let mut dict = match page_obj.as_dict() {
149            Some(d) => d.clone(),
150            None => return Ok(page_obj),
151        };
152
153        // Check which keys are missing
154        let missing: Vec<&[u8]> = INHERITABLE
155            .iter()
156            .filter(|&&key| !dict.contains_key(key))
157            .copied()
158            .collect();
159
160        if missing.is_empty() {
161            return Ok(page_obj);
162        }
163
164        // Walk up the /Parent chain
165        let mut parent_ref = dict
166            .get(b"Parent".as_slice())
167            .and_then(|o| o.as_reference());
168        let mut visited = std::collections::HashSet::new();
169
170        while let Some(pref) = parent_ref {
171            if visited.contains(&pref.num) {
172                break; // prevent cycles
173            }
174            visited.insert(pref.num);
175
176            let parent = match self.cos.get_object(pref.num)? {
177                Some(obj) => obj.clone(),
178                None => break,
179            };
180
181            let parent_dict = match parent.as_dict() {
182                Some(d) => d,
183                None => break,
184            };
185
186            // Copy missing inheritable keys from this ancestor
187            for &key in &missing {
188                if !dict.contains_key(key) {
189                    if let Some(value) = parent_dict.get(key) {
190                        dict.insert(key.to_vec(), value.clone());
191                    }
192                }
193            }
194
195            // If all keys are now resolved, stop
196            if INHERITABLE.iter().all(|&key| dict.contains_key(key)) {
197                break;
198            }
199
200            // Continue up the chain
201            parent_ref = parent_dict
202                .get(b"Parent".as_slice())
203                .and_then(|o| o.as_reference());
204        }
205
206        Ok(PdfObject::Dict(dict))
207    }
208
209    /// Collect all page object references by walking the page tree.
210    fn collect_page_refs(&mut self) -> Result<Vec<ObjectId>> {
211        let pages_ref = self.pages_ref()?;
212        let mut result = Vec::new();
213        self.collect_pages_recursive(pages_ref, &mut result)?;
214        Ok(result)
215    }
216
217    fn collect_pages_recursive(
218        &mut self,
219        node_ref: ObjectId,
220        result: &mut Vec<ObjectId>,
221    ) -> Result<()> {
222        let node = self
223            .cos
224            .get_object(node_ref.num)?
225            .ok_or_else(|| {
226                FolioError::InvalidObject(format!("Page tree node {} not found", node_ref.num))
227            })?
228            .clone();
229
230        let type_name = node.dict_get_name(b"Type").unwrap_or(b"");
231
232        match type_name {
233            b"Pages" => {
234                // Intermediate node — recurse into Kids
235                if let Some(kids) = node.dict_get(b"Kids").and_then(|o| o.as_array()) {
236                    for kid in kids {
237                        if let Some(kid_ref) = kid.as_reference() {
238                            self.collect_pages_recursive(kid_ref, result)?;
239                        }
240                    }
241                }
242            }
243            b"Page" | _ => {
244                // Leaf node (a page)
245                result.push(node_ref);
246            }
247        }
248
249        Ok(())
250    }
251
252    /// Create a new page with the given media box and add it to the document.
253    pub fn create_page(&mut self, media_box: Rect) -> Result<u32> {
254        let pages_ref = self.pages_ref()?;
255
256        // Create the page object
257        let mut page_dict = IndexMap::new();
258        page_dict.insert(b"Type".to_vec(), PdfObject::Name(b"Page".to_vec()));
259        page_dict.insert(b"Parent".to_vec(), PdfObject::Reference(pages_ref));
260        page_dict.insert(
261            b"MediaBox".to_vec(),
262            PdfObject::Array(vec![
263                PdfObject::Real(media_box.x1),
264                PdfObject::Real(media_box.y1),
265                PdfObject::Real(media_box.x2),
266                PdfObject::Real(media_box.y2),
267            ]),
268        );
269        let page_id = self.cos.create_indirect(PdfObject::Dict(page_dict));
270
271        // Add to Pages Kids array and increment Count
272        let pages = self
273            .cos
274            .get_object(pages_ref.num)?
275            .ok_or_else(|| FolioError::InvalidObject("Pages not found".into()))?
276            .clone();
277
278        let mut pages_dict = pages.as_dict().cloned().unwrap_or_default();
279
280        // Update Kids
281        let mut kids = pages_dict
282            .get(b"Kids".as_slice())
283            .and_then(|o| o.as_array())
284            .map(|a| a.to_vec())
285            .unwrap_or_default();
286        kids.push(PdfObject::Reference(page_id));
287        pages_dict.insert(b"Kids".to_vec(), PdfObject::Array(kids.clone()));
288
289        // Update Count
290        pages_dict.insert(b"Count".to_vec(), PdfObject::Integer(kids.len() as i64));
291
292        self.cos
293            .update_object(pages_ref.num, PdfObject::Dict(pages_dict));
294
295        Ok(kids.len() as u32)
296    }
297
298    /// Get document info (title, author, etc.).
299    pub fn doc_info(&mut self) -> Result<DocInfo> {
300        let info_ref = self
301            .cos
302            .trailer()
303            .get(b"Info".as_slice())
304            .and_then(|o| o.as_reference());
305
306        match info_ref {
307            Some(id) => {
308                let obj = self
309                    .cos
310                    .get_object(id.num)?
311                    .cloned()
312                    .unwrap_or(PdfObject::Null);
313                Ok(DocInfo::from_dict(
314                    obj.as_dict().cloned().unwrap_or_default(),
315                ))
316            }
317            None => Ok(DocInfo::default()),
318        }
319    }
320
321    /// Check if the document has been modified.
322    pub fn is_modified(&self) -> bool {
323        self.cos.is_modified()
324    }
325
326    /// Save the document to bytes.
327    pub fn save_to_bytes(&mut self) -> Result<Vec<u8>> {
328        self.cos.save_to_bytes()
329    }
330
331    /// Save the document to a file.
332    pub fn save(&mut self, path: &str) -> Result<()> {
333        self.cos.save_to_file(path)
334    }
335}