Skip to main content

fop_render/pdf/
outline.rs

1//! PDF outline (bookmarks) extraction from FO tree
2
3use super::document::{PdfOutline, PdfOutlineItem};
4use fop_core::{FoArena, FoNodeData, NodeId};
5use fop_types::Result;
6
7/// Extract PDF outline from FO tree
8pub fn extract_outline_from_fo_tree(fo_tree: &FoArena) -> Result<Option<PdfOutline>> {
9    // Find the bookmark-tree element
10    if let Some((root_id, _)) = fo_tree.root() {
11        if let Some(bookmark_tree_id) = find_bookmark_tree(fo_tree, root_id) {
12            let items = extract_outline_items(fo_tree, bookmark_tree_id)?;
13            if !items.is_empty() {
14                return Ok(Some(PdfOutline { items }));
15            }
16        }
17    }
18
19    Ok(None)
20}
21
22/// Find the bookmark-tree node in the FO tree
23fn find_bookmark_tree(fo_tree: &FoArena, node_id: NodeId) -> Option<NodeId> {
24    let node = fo_tree.get(node_id)?;
25
26    // Check if this is a bookmark-tree
27    if matches!(node.data, FoNodeData::BookmarkTree { .. }) {
28        return Some(node_id);
29    }
30
31    // Search children
32    for child_id in fo_tree.children(node_id) {
33        if let Some(result) = find_bookmark_tree(fo_tree, child_id) {
34            return Some(result);
35        }
36    }
37
38    None
39}
40
41/// Extract outline items from bookmark-tree children
42fn extract_outline_items(fo_tree: &FoArena, parent_id: NodeId) -> Result<Vec<PdfOutlineItem>> {
43    let mut items = Vec::new();
44
45    for child_id in fo_tree.children(parent_id) {
46        if let Some(node) = fo_tree.get(child_id) {
47            if let FoNodeData::Bookmark {
48                internal_destination,
49                external_destination,
50                ..
51            } = &node.data
52            {
53                // Extract bookmark title
54                let title = extract_bookmark_title(fo_tree, child_id)?;
55
56                // For now, map internal destinations to page index 0
57                // In a real implementation, we'd resolve the destination to an actual page
58                let page_index = if internal_destination.is_some() {
59                    Some(0)
60                } else {
61                    None
62                };
63
64                // Extract nested bookmarks
65                let children = extract_nested_bookmarks(fo_tree, child_id)?;
66
67                items.push(PdfOutlineItem {
68                    title,
69                    page_index,
70                    external_destination: external_destination.clone(),
71                    children,
72                });
73            }
74        }
75    }
76
77    Ok(items)
78}
79
80/// Extract nested bookmarks (child fo:bookmark elements)
81fn extract_nested_bookmarks(fo_tree: &FoArena, bookmark_id: NodeId) -> Result<Vec<PdfOutlineItem>> {
82    let mut items = Vec::new();
83
84    for child_id in fo_tree.children(bookmark_id) {
85        if let Some(node) = fo_tree.get(child_id) {
86            // Only process child bookmark elements, not bookmark-title
87            if let FoNodeData::Bookmark {
88                internal_destination,
89                external_destination,
90                ..
91            } = &node.data
92            {
93                let title = extract_bookmark_title(fo_tree, child_id)?;
94                let page_index = if internal_destination.is_some() {
95                    Some(0)
96                } else {
97                    None
98                };
99
100                // Recursively extract nested bookmarks
101                let children = extract_nested_bookmarks(fo_tree, child_id)?;
102
103                items.push(PdfOutlineItem {
104                    title,
105                    page_index,
106                    external_destination: external_destination.clone(),
107                    children,
108                });
109            }
110        }
111    }
112
113    Ok(items)
114}
115
116/// Extract the title text from a bookmark's bookmark-title child
117fn extract_bookmark_title(fo_tree: &FoArena, bookmark_id: NodeId) -> Result<String> {
118    let mut title = String::new();
119
120    for child_id in fo_tree.children(bookmark_id) {
121        if let Some(node) = fo_tree.get(child_id) {
122            if matches!(node.data, FoNodeData::BookmarkTitle { .. }) {
123                // Extract text from bookmark-title children
124                title = extract_text_content(fo_tree, child_id);
125                break;
126            }
127        }
128    }
129
130    if title.is_empty() {
131        title = "Untitled".to_string();
132    }
133
134    Ok(title)
135}
136
137/// Extract text content from a node and its children
138fn extract_text_content(fo_tree: &FoArena, node_id: NodeId) -> String {
139    let mut text = String::new();
140
141    for child_id in fo_tree.children(node_id) {
142        if let Some(node) = fo_tree.get(child_id) {
143            if let FoNodeData::Text(content) = &node.data {
144                text.push_str(content);
145            }
146        }
147    }
148
149    text
150}
151
152#[cfg(test)]
153mod tests {
154    use super::*;
155    use fop_core::FoTreeBuilder;
156    use std::io::Cursor;
157
158    #[test]
159    fn test_extract_simple_outline() {
160        let xml = r#"<?xml version="1.0"?>
161<fo:root xmlns:fo="http://www.w3.org/1999/XSL/Format">
162    <fo:layout-master-set>
163        <fo:simple-page-master master-name="A4">
164            <fo:region-body/>
165        </fo:simple-page-master>
166    </fo:layout-master-set>
167    <fo:bookmark-tree>
168        <fo:bookmark internal-destination="ch1">
169            <fo:bookmark-title>Chapter 1</fo:bookmark-title>
170        </fo:bookmark>
171        <fo:bookmark internal-destination="ch2">
172            <fo:bookmark-title>Chapter 2</fo:bookmark-title>
173        </fo:bookmark>
174    </fo:bookmark-tree>
175</fo:root>"#;
176
177        let cursor = Cursor::new(xml);
178        let builder = FoTreeBuilder::new();
179        let fo_tree = builder.parse(cursor).expect("test: should succeed");
180
181        let outline = extract_outline_from_fo_tree(&fo_tree).expect("test: should succeed");
182        assert!(outline.is_some());
183
184        let outline = outline.expect("test: should succeed");
185        assert_eq!(outline.items.len(), 2);
186        assert_eq!(outline.items[0].title, "Chapter 1");
187        assert_eq!(outline.items[1].title, "Chapter 2");
188    }
189
190    #[test]
191    fn test_extract_nested_outline() {
192        let xml = r#"<?xml version="1.0"?>
193<fo:root xmlns:fo="http://www.w3.org/1999/XSL/Format">
194    <fo:layout-master-set>
195        <fo:simple-page-master master-name="A4">
196            <fo:region-body/>
197        </fo:simple-page-master>
198    </fo:layout-master-set>
199    <fo:bookmark-tree>
200        <fo:bookmark internal-destination="ch1">
201            <fo:bookmark-title>Chapter 1</fo:bookmark-title>
202            <fo:bookmark internal-destination="s1.1">
203                <fo:bookmark-title>Section 1.1</fo:bookmark-title>
204            </fo:bookmark>
205            <fo:bookmark internal-destination="s1.2">
206                <fo:bookmark-title>Section 1.2</fo:bookmark-title>
207            </fo:bookmark>
208        </fo:bookmark>
209    </fo:bookmark-tree>
210</fo:root>"#;
211
212        let cursor = Cursor::new(xml);
213        let builder = FoTreeBuilder::new();
214        let fo_tree = builder.parse(cursor).expect("test: should succeed");
215
216        let outline = extract_outline_from_fo_tree(&fo_tree).expect("test: should succeed");
217        assert!(outline.is_some());
218
219        let outline = outline.expect("test: should succeed");
220        assert_eq!(outline.items.len(), 1);
221        assert_eq!(outline.items[0].title, "Chapter 1");
222        assert_eq!(outline.items[0].children.len(), 2);
223        assert_eq!(outline.items[0].children[0].title, "Section 1.1");
224        assert_eq!(outline.items[0].children[1].title, "Section 1.2");
225    }
226
227    #[test]
228    fn test_no_outline() {
229        let xml = r#"<?xml version="1.0"?>
230<fo:root xmlns:fo="http://www.w3.org/1999/XSL/Format">
231    <fo:layout-master-set>
232        <fo:simple-page-master master-name="A4">
233            <fo:region-body/>
234        </fo:simple-page-master>
235    </fo:layout-master-set>
236</fo:root>"#;
237
238        let cursor = Cursor::new(xml);
239        let builder = FoTreeBuilder::new();
240        let fo_tree = builder.parse(cursor).expect("test: should succeed");
241
242        let outline = extract_outline_from_fo_tree(&fo_tree).expect("test: should succeed");
243        assert!(outline.is_none());
244    }
245}
246
247#[cfg(test)]
248mod tests_outline_comprehensive {
249    use super::super::document::{PdfDocument, PdfOutline, PdfOutlineItem, PdfPage};
250    use super::*;
251    use fop_core::FoTreeBuilder;
252    use fop_types::Length;
253    use std::io::Cursor;
254
255    fn parse_fo(xml: &'static str) -> fop_core::FoArena<'static> {
256        let cursor = Cursor::new(xml);
257        let builder = FoTreeBuilder::new();
258        builder.parse(cursor).expect("test: should succeed")
259    }
260
261    // ── PdfOutlineItem construction ───────────────────────────────────────────
262
263    #[test]
264    fn test_outline_item_title_and_page_ref() {
265        let item = PdfOutlineItem {
266            title: "Introduction".to_string(),
267            page_index: Some(0),
268            external_destination: None,
269            children: vec![],
270        };
271        assert_eq!(item.title, "Introduction");
272        assert_eq!(item.page_index, Some(0));
273        assert!(item.external_destination.is_none());
274        assert!(item.children.is_empty());
275    }
276
277    #[test]
278    fn test_outline_item_external_destination() {
279        let item = PdfOutlineItem {
280            title: "External Link".to_string(),
281            page_index: None,
282            external_destination: Some("https://example.com".to_string()),
283            children: vec![],
284        };
285        assert!(item.page_index.is_none());
286        assert_eq!(
287            item.external_destination.as_deref(),
288            Some("https://example.com")
289        );
290    }
291
292    #[test]
293    fn test_outline_item_clone() {
294        let item = PdfOutlineItem {
295            title: "Chapter".to_string(),
296            page_index: Some(2),
297            external_destination: None,
298            children: vec![],
299        };
300        let cloned = item.clone();
301        assert_eq!(cloned.title, "Chapter");
302        assert_eq!(cloned.page_index, Some(2));
303    }
304
305    // ── Sibling outline items ─────────────────────────────────────────────────
306
307    #[test]
308    fn test_sibling_outline_items_three_siblings() {
309        let xml = r#"<?xml version="1.0"?>
310<fo:root xmlns:fo="http://www.w3.org/1999/XSL/Format">
311    <fo:layout-master-set>
312        <fo:simple-page-master master-name="A4">
313            <fo:region-body/>
314        </fo:simple-page-master>
315    </fo:layout-master-set>
316    <fo:bookmark-tree>
317        <fo:bookmark internal-destination="p1">
318            <fo:bookmark-title>Part 1</fo:bookmark-title>
319        </fo:bookmark>
320        <fo:bookmark internal-destination="p2">
321            <fo:bookmark-title>Part 2</fo:bookmark-title>
322        </fo:bookmark>
323        <fo:bookmark internal-destination="p3">
324            <fo:bookmark-title>Part 3</fo:bookmark-title>
325        </fo:bookmark>
326    </fo:bookmark-tree>
327</fo:root>"#;
328        let fo = parse_fo(xml);
329        let outline = extract_outline_from_fo_tree(&fo)
330            .expect("test: should succeed")
331            .expect("test: should succeed");
332        assert_eq!(outline.items.len(), 3);
333        assert_eq!(outline.items[0].title, "Part 1");
334        assert_eq!(outline.items[1].title, "Part 2");
335        assert_eq!(outline.items[2].title, "Part 3");
336    }
337
338    // ── Nested children (3 levels deep) ──────────────────────────────────────
339
340    #[test]
341    fn test_three_level_nested_outline() {
342        let xml = r#"<?xml version="1.0"?>
343<fo:root xmlns:fo="http://www.w3.org/1999/XSL/Format">
344    <fo:layout-master-set>
345        <fo:simple-page-master master-name="A4">
346            <fo:region-body/>
347        </fo:simple-page-master>
348    </fo:layout-master-set>
349    <fo:bookmark-tree>
350        <fo:bookmark internal-destination="ch1">
351            <fo:bookmark-title>Chapter 1</fo:bookmark-title>
352            <fo:bookmark internal-destination="s1">
353                <fo:bookmark-title>Section 1</fo:bookmark-title>
354                <fo:bookmark internal-destination="ss1">
355                    <fo:bookmark-title>Subsection 1</fo:bookmark-title>
356                </fo:bookmark>
357            </fo:bookmark>
358        </fo:bookmark>
359    </fo:bookmark-tree>
360</fo:root>"#;
361        let fo = parse_fo(xml);
362        let outline = extract_outline_from_fo_tree(&fo)
363            .expect("test: should succeed")
364            .expect("test: should succeed");
365        assert_eq!(outline.items.len(), 1);
366        let ch1 = &outline.items[0];
367        assert_eq!(ch1.title, "Chapter 1");
368        assert_eq!(ch1.children.len(), 1);
369        let s1 = &ch1.children[0];
370        assert_eq!(s1.title, "Section 1");
371        assert_eq!(s1.children.len(), 1);
372        assert_eq!(s1.children[0].title, "Subsection 1");
373    }
374
375    // ── /Outlines dict structure in generated PDF ─────────────────────────────
376
377    #[test]
378    fn test_outlines_dict_present_in_pdf_bytes() {
379        let mut doc = PdfDocument::new();
380        let outline = PdfOutline {
381            items: vec![PdfOutlineItem {
382                title: "Only Chapter".to_string(),
383                page_index: Some(0),
384                external_destination: None,
385                children: vec![],
386            }],
387        };
388        doc.set_outline(outline);
389        doc.add_page(PdfPage::new(Length::from_mm(210.0), Length::from_mm(297.0)));
390        let bytes = doc.to_bytes().expect("test: should succeed");
391        let s = String::from_utf8_lossy(&bytes);
392        assert!(s.contains("/Outlines"));
393    }
394
395    #[test]
396    fn test_catalog_references_outlines() {
397        let mut doc = PdfDocument::new();
398        let outline = PdfOutline {
399            items: vec![PdfOutlineItem {
400                title: "Intro".to_string(),
401                page_index: Some(0),
402                external_destination: None,
403                children: vec![],
404            }],
405        };
406        doc.set_outline(outline);
407        doc.add_page(PdfPage::new(Length::from_mm(210.0), Length::from_mm(297.0)));
408        let bytes = doc.to_bytes().expect("test: should succeed");
409        let s = String::from_utf8_lossy(&bytes);
410        // Catalog must reference /Outlines N 0 R
411        assert!(s.contains("/Outlines 4 0 R"));
412    }
413
414    #[test]
415    fn test_outline_title_appears_in_pdf_bytes() {
416        let mut doc = PdfDocument::new();
417        let outline = PdfOutline {
418            items: vec![
419                PdfOutlineItem {
420                    title: "Alpha Chapter".to_string(),
421                    page_index: Some(0),
422                    external_destination: None,
423                    children: vec![],
424                },
425                PdfOutlineItem {
426                    title: "Beta Chapter".to_string(),
427                    page_index: Some(0),
428                    external_destination: None,
429                    children: vec![],
430                },
431            ],
432        };
433        doc.set_outline(outline);
434        doc.add_page(PdfPage::new(Length::from_mm(210.0), Length::from_mm(297.0)));
435        let bytes = doc.to_bytes().expect("test: should succeed");
436        let s = String::from_utf8_lossy(&bytes);
437        assert!(s.contains("Alpha Chapter"));
438        assert!(s.contains("Beta Chapter"));
439    }
440
441    // ── Count field: total descendants ───────────────────────────────────────
442
443    #[test]
444    fn test_outline_count_reflected_in_pdf() {
445        // Two top-level items → /Count 2 in /Outlines root dict
446        let mut doc = PdfDocument::new();
447        let outline = PdfOutline {
448            items: vec![
449                PdfOutlineItem {
450                    title: "Item A".to_string(),
451                    page_index: Some(0),
452                    external_destination: None,
453                    children: vec![],
454                },
455                PdfOutlineItem {
456                    title: "Item B".to_string(),
457                    page_index: Some(0),
458                    external_destination: None,
459                    children: vec![],
460                },
461            ],
462        };
463        doc.set_outline(outline);
464        doc.add_page(PdfPage::new(Length::from_mm(210.0), Length::from_mm(297.0)));
465        let bytes = doc.to_bytes().expect("test: should succeed");
466        let s = String::from_utf8_lossy(&bytes);
467        // Outlines root has /Count 2
468        assert!(s.contains("/Count 2"));
469    }
470
471    // ── No outline → no /Outlines in catalog ─────────────────────────────────
472
473    #[test]
474    fn test_no_outline_no_outlines_in_catalog() {
475        let doc = PdfDocument::new();
476        let bytes = doc.to_bytes().expect("test: should succeed");
477        let s = String::from_utf8_lossy(&bytes);
478        assert!(!s.contains("/Outlines 4 0 R"));
479    }
480
481    // ── PdfOutline struct ─────────────────────────────────────────────────────
482
483    #[test]
484    fn test_pdf_outline_items_count() {
485        let outline = PdfOutline {
486            items: vec![
487                PdfOutlineItem {
488                    title: "A".to_string(),
489                    page_index: Some(0),
490                    external_destination: None,
491                    children: vec![],
492                },
493                PdfOutlineItem {
494                    title: "B".to_string(),
495                    page_index: Some(1),
496                    external_destination: None,
497                    children: vec![],
498                },
499                PdfOutlineItem {
500                    title: "C".to_string(),
501                    page_index: Some(2),
502                    external_destination: None,
503                    children: vec![],
504                },
505            ],
506        };
507        assert_eq!(outline.items.len(), 3);
508    }
509
510    #[test]
511    fn test_bookmark_untitled_defaults_to_untitled() {
512        // A bookmark without a bookmark-title child should get "Untitled"
513        let xml = r#"<?xml version="1.0"?>
514<fo:root xmlns:fo="http://www.w3.org/1999/XSL/Format">
515    <fo:layout-master-set>
516        <fo:simple-page-master master-name="A4">
517            <fo:region-body/>
518        </fo:simple-page-master>
519    </fo:layout-master-set>
520    <fo:bookmark-tree>
521        <fo:bookmark internal-destination="x">
522        </fo:bookmark>
523    </fo:bookmark-tree>
524</fo:root>"#;
525        let fo = parse_fo(xml);
526        let outline = extract_outline_from_fo_tree(&fo)
527            .expect("test: should succeed")
528            .expect("test: should succeed");
529        assert_eq!(outline.items[0].title, "Untitled");
530    }
531}