rpdfium-doc 7676.6.4

Document-level features for rpdfium
Documentation
// Derived from PDFium's cpdf_bookmarktree.h/cpp
// Original: Copyright 2014 The PDFium Authors
// Licensed under BSD-3-Clause / Apache-2.0
// See pdfium-upstream/LICENSE for the original license.

//! Bookmark tree traversal (`CPDF_BookmarkTree`).
//!
//! Iteratively traverses the PDF outline (bookmark) tree anchored at
//! `/Outlines` in the document catalog (ISO 32000-2 section 12.3.3).
//! All traversal is **iterative** (explicit `Vec` stack) for WASM safety.
//!
//! Corresponds to upstream `CPDF_BookmarkTree`.

use std::collections::HashMap;

use rpdfium_core::{Name, PdfSource};
use rpdfium_parser::{Object, ObjectStore};

use crate::bookmark::{Bookmark, parse_single_bookmark};
use crate::error::{DocError, DocResult};

/// Maximum depth for bookmark tree traversal.
const MAX_TREE_DEPTH: usize = 64;

/// Maximum total bookmark entries to parse (safety limit).
const MAX_TOTAL_BOOKMARKS: usize = 10_000;

/// Parse the bookmark tree from the document catalog.
///
/// Returns an empty Vec if no `/Outlines` entry is present.
/// All traversal is iterative to satisfy WASM stack constraints.
pub fn parse_bookmarks<S: PdfSource>(
    catalog: &Object,
    store: &ObjectStore<S>,
) -> DocResult<Vec<Bookmark>> {
    let catalog_dict = store
        .deep_resolve(catalog)
        .map_err(|e| DocError::Parser(e.to_string()))?
        .as_dict()
        .ok_or(DocError::UnexpectedType)?;

    // Get /Outlines entry
    let outlines_obj = match catalog_dict.get(&Name::outlines()) {
        Some(obj) => store
            .deep_resolve(obj)
            .map_err(|e| DocError::Parser(e.to_string()))?,
        None => return Ok(Vec::new()),
    };

    let outlines_dict = match outlines_obj.as_dict() {
        Some(d) => d,
        None => return Ok(Vec::new()),
    };

    // Get /First child of the outlines root
    let first_obj = match outlines_dict.get(&Name::first()) {
        Some(obj) => obj,
        None => return Ok(Vec::new()),
    };

    // Iterative DFS tree construction.
    // We build a flat list of (depth, Bookmark) in document order, then
    // reconstruct the tree structure.
    //
    // Stack items are processed LIFO, so we push /Next BEFORE /First
    // to ensure children are visited before the next sibling (DFS order).
    let mut flat: Vec<(usize, Bookmark)> = Vec::new();

    struct StackItem<'a> {
        obj: &'a Object,
        depth: usize,
    }

    let mut stack: Vec<StackItem<'_>> = vec![StackItem {
        obj: first_obj,
        depth: 0,
    }];

    while let Some(item) = stack.pop() {
        if item.depth > MAX_TREE_DEPTH {
            return Err(DocError::DepthExceeded);
        }

        if flat.len() > MAX_TOTAL_BOOKMARKS {
            // Safety limit on total bookmark count
            break;
        }

        let dict = match resolve_to_dict(item.obj, store) {
            Ok(d) => d,
            Err(_) => continue,
        };

        let bookmark = parse_single_bookmark(dict, store)?;
        flat.push((item.depth, bookmark));

        // Push /Next sibling FIRST (will be processed AFTER child due to LIFO)
        if let Some(next_obj) = dict.get(&Name::next()) {
            stack.push(StackItem {
                obj: next_obj,
                depth: item.depth,
            });
        }

        // Push /First child SECOND (will be processed BEFORE sibling due to LIFO)
        if let Some(child_obj) = dict.get(&Name::first()) {
            stack.push(StackItem {
                obj: child_obj,
                depth: item.depth + 1,
            });
        }
    }

    // Reconstruct tree from flat (depth, Bookmark) list
    build_tree_from_flat(flat)
}

/// Reconstruct a tree from a flat depth-tagged list.
///
/// Uses an iterative index-path approach: we track the path of indices
/// from the root to the current insertion point and navigate to the
/// correct parent for each new node.
fn build_tree_from_flat(flat: Vec<(usize, Bookmark)>) -> DocResult<Vec<Bookmark>> {
    if flat.is_empty() {
        return Ok(Vec::new());
    }

    let mut root: Vec<Bookmark> = Vec::new();
    // path[i] = index of the node at depth i in its parent's children vec.
    // This lets us navigate to the correct parent iteratively.
    let mut path: Vec<usize> = Vec::new();

    for (depth, bookmark) in flat {
        // Truncate the path to the parent's depth
        path.truncate(depth);

        // Navigate to the target container iteratively
        let container = get_children_at_path(&mut root, &path);
        let idx = container.len();
        container.push(bookmark);

        // Record this node's index for potential children
        if path.len() <= depth {
            path.push(idx);
        }
    }

    Ok(root)
}

/// Iteratively navigate to the children vec at the given index path.
fn get_children_at_path<'a>(root: &'a mut Vec<Bookmark>, path: &[usize]) -> &'a mut Vec<Bookmark> {
    let mut current = root;
    for &idx in path {
        current = &mut current[idx].children;
    }
    current
}

/// Resolve an object to a dictionary.
fn resolve_to_dict<'a, S: PdfSource>(
    obj: &'a Object,
    store: &'a ObjectStore<S>,
) -> DocResult<&'a HashMap<Name, Object>> {
    let resolved = store
        .deep_resolve(obj)
        .map_err(|e| DocError::Parser(e.to_string()))?;
    resolved.as_dict().ok_or(DocError::UnexpectedType)
}

#[cfg(test)]
mod tests {
    use super::*;
    use rpdfium_core::PdfString;

    fn build_store() -> ObjectStore<Vec<u8>> {
        let pdf = build_minimal_pdf();
        ObjectStore::open(pdf, rpdfium_core::ParsingMode::Lenient).unwrap()
    }

    fn build_minimal_pdf() -> Vec<u8> {
        let mut pdf = Vec::new();
        pdf.extend_from_slice(b"%PDF-1.4\n");
        let obj1_offset = pdf.len();
        pdf.extend_from_slice(b"1 0 obj\n<< /Type /Catalog /Pages 2 0 R >>\nendobj\n");
        let obj2_offset = pdf.len();
        pdf.extend_from_slice(b"2 0 obj\n<< /Type /Pages /Kids [] /Count 0 >>\nendobj\n");
        let xref_offset = pdf.len();
        pdf.extend_from_slice(b"xref\n0 3\n");
        pdf.extend_from_slice(b"0000000000 65535 f \r\n");
        pdf.extend_from_slice(format!("{:010} 00000 n \r\n", obj1_offset).as_bytes());
        pdf.extend_from_slice(format!("{:010} 00000 n \r\n", obj2_offset).as_bytes());
        pdf.extend_from_slice(b"trailer\n<< /Size 3 /Root 1 0 R >>\n");
        pdf.extend_from_slice(format!("startxref\n{}\n%%EOF", xref_offset).as_bytes());
        pdf
    }

    fn str_obj(s: &str) -> Object {
        Object::String(PdfString::from_bytes(s.as_bytes().to_vec()))
    }

    #[test]
    fn test_no_outlines() {
        let store = build_store();
        // Catalog without /Outlines
        let mut dict = HashMap::new();
        dict.insert(Name::r#type(), Object::Name(Name::from("Catalog")));
        let catalog = Object::Dictionary(dict);
        let bookmarks = parse_bookmarks(&catalog, &store).unwrap();
        assert!(bookmarks.is_empty());
    }

    #[test]
    fn test_single_bookmark() {
        let store = build_store();

        let mut bm_dict = HashMap::new();
        bm_dict.insert(Name::title(), str_obj("Chapter 1"));
        bm_dict.insert(Name::count(), Object::Integer(0));

        let mut outlines = HashMap::new();
        outlines.insert(Name::r#type(), Object::Name(Name::from("Outlines")));
        outlines.insert(Name::first(), Object::Dictionary(bm_dict));

        let mut catalog = HashMap::new();
        catalog.insert(Name::r#type(), Object::Name(Name::from("Catalog")));
        catalog.insert(Name::outlines(), Object::Dictionary(outlines));

        let catalog_obj = Object::Dictionary(catalog);
        let bookmarks = parse_bookmarks(&catalog_obj, &store).unwrap();
        assert_eq!(bookmarks.len(), 1);
        assert_eq!(bookmarks[0].title, "Chapter 1");
        assert!(!bookmarks[0].is_open);
    }

    #[test]
    fn test_linked_list_siblings() {
        let store = build_store();

        // Third sibling (no /Next)
        let mut bm3 = HashMap::new();
        bm3.insert(Name::title(), str_obj("Part 3"));

        // Second sibling -> /Next -> bm3
        let mut bm2 = HashMap::new();
        bm2.insert(Name::title(), str_obj("Part 2"));
        bm2.insert(Name::next(), Object::Dictionary(bm3));

        // First sibling -> /Next -> bm2
        let mut bm1 = HashMap::new();
        bm1.insert(Name::title(), str_obj("Part 1"));
        bm1.insert(Name::next(), Object::Dictionary(bm2));

        let mut outlines = HashMap::new();
        outlines.insert(Name::first(), Object::Dictionary(bm1));

        let mut catalog = HashMap::new();
        catalog.insert(Name::outlines(), Object::Dictionary(outlines));

        let catalog_obj = Object::Dictionary(catalog);
        let bookmarks = parse_bookmarks(&catalog_obj, &store).unwrap();
        assert_eq!(bookmarks.len(), 3);
        assert_eq!(bookmarks[0].title, "Part 1");
        assert_eq!(bookmarks[1].title, "Part 2");
        assert_eq!(bookmarks[2].title, "Part 3");
    }

    #[test]
    fn test_nested_bookmarks() {
        let store = build_store();

        // Child bookmark
        let mut child = HashMap::new();
        child.insert(Name::title(), str_obj("Section 1.1"));

        // Parent with /First pointing to child
        let mut parent = HashMap::new();
        parent.insert(Name::title(), str_obj("Chapter 1"));
        parent.insert(Name::first(), Object::Dictionary(child));
        parent.insert(Name::count(), Object::Integer(1)); // open

        let mut outlines = HashMap::new();
        outlines.insert(Name::first(), Object::Dictionary(parent));

        let mut catalog = HashMap::new();
        catalog.insert(Name::outlines(), Object::Dictionary(outlines));

        let catalog_obj = Object::Dictionary(catalog);
        let bookmarks = parse_bookmarks(&catalog_obj, &store).unwrap();
        assert_eq!(bookmarks.len(), 1);
        assert_eq!(bookmarks[0].title, "Chapter 1");
        assert!(bookmarks[0].is_open);
        assert_eq!(bookmarks[0].children.len(), 1);
        assert_eq!(bookmarks[0].children[0].title, "Section 1.1");
    }

    #[test]
    fn test_open_closed_state() {
        let store = build_store();

        let mut open_bm = HashMap::new();
        open_bm.insert(Name::title(), str_obj("Open"));
        open_bm.insert(Name::count(), Object::Integer(3));

        let mut closed_bm = HashMap::new();
        closed_bm.insert(Name::title(), str_obj("Closed"));
        closed_bm.insert(Name::count(), Object::Integer(-2));
        open_bm.insert(Name::next(), Object::Dictionary(closed_bm));

        let mut outlines = HashMap::new();
        outlines.insert(Name::first(), Object::Dictionary(open_bm));

        let mut catalog = HashMap::new();
        catalog.insert(Name::outlines(), Object::Dictionary(outlines));

        let catalog_obj = Object::Dictionary(catalog);
        let bookmarks = parse_bookmarks(&catalog_obj, &store).unwrap();
        assert_eq!(bookmarks.len(), 2);
        assert!(bookmarks[0].is_open); // count > 0
        assert!(!bookmarks[1].is_open); // count < 0
    }

    #[test]
    fn test_bookmark_with_destination() {
        let store = build_store();

        let mut bm = HashMap::new();
        bm.insert(Name::title(), str_obj("Go Here"));
        bm.insert(
            Name::dest(),
            Object::String(PdfString::from_bytes(b"page1".to_vec())),
        );

        let mut outlines = HashMap::new();
        outlines.insert(Name::first(), Object::Dictionary(bm));

        let mut catalog = HashMap::new();
        catalog.insert(Name::outlines(), Object::Dictionary(outlines));

        let catalog_obj = Object::Dictionary(catalog);
        let bookmarks = parse_bookmarks(&catalog_obj, &store).unwrap();
        assert_eq!(bookmarks.len(), 1);
        assert!(bookmarks[0].destination.is_some());
    }

    #[test]
    fn test_bookmark_with_action() {
        let store = build_store();

        let mut action_dict = HashMap::new();
        action_dict.insert(Name::s(), Object::Name(Name::from("URI")));
        action_dict.insert(
            Name::uri(),
            Object::String(PdfString::from_bytes(b"https://example.com".to_vec())),
        );

        let mut bm = HashMap::new();
        bm.insert(Name::title(), str_obj("Link"));
        bm.insert(Name::a(), Object::Dictionary(action_dict));

        let mut outlines = HashMap::new();
        outlines.insert(Name::first(), Object::Dictionary(bm));

        let mut catalog = HashMap::new();
        catalog.insert(Name::outlines(), Object::Dictionary(outlines));

        let catalog_obj = Object::Dictionary(catalog);
        let bookmarks = parse_bookmarks(&catalog_obj, &store).unwrap();
        assert_eq!(bookmarks.len(), 1);
        assert!(bookmarks[0].action.is_some());
    }

    #[test]
    fn test_empty_outlines_dict() {
        let store = build_store();

        let outlines = HashMap::new();

        let mut catalog = HashMap::new();
        catalog.insert(Name::outlines(), Object::Dictionary(outlines));

        let catalog_obj = Object::Dictionary(catalog);
        let bookmarks = parse_bookmarks(&catalog_obj, &store).unwrap();
        assert!(bookmarks.is_empty());
    }
}