Skip to main content

pdfluent_lopdf/
toc.rs

1use indexmap::IndexMap;
2
3#[cfg(feature = "serde")]
4use serde::{Deserialize, Serialize};
5
6use super::{Document, Error, Object, Outline, Result};
7
8#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
9#[derive(Debug, Clone)]
10pub struct TocType {
11    pub level: usize,
12    pub title: String,
13    pub page: usize,
14}
15
16#[allow(dead_code)]
17#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
18#[derive(Debug, Clone, Default)]
19pub struct Toc {
20    pub toc: Vec<TocType>,
21    pub errors: Vec<String>,
22}
23
24impl Toc {
25    pub fn new() -> Self {
26        Toc {
27            toc: Vec::new(),
28            errors: Vec::new(),
29        }
30    }
31}
32
33#[derive(Debug, Clone)]
34pub struct Destination {
35    map: IndexMap<Vec<u8>, Object>,
36}
37
38#[allow(dead_code)]
39impl Destination {
40    pub fn new(title: Object, page: Object, typ: Object) -> Self {
41        let mut map = IndexMap::new();
42        map.insert(b"Title".to_vec(), title);
43        map.insert(b"Page".to_vec(), page);
44        map.insert(b"Type".to_vec(), typ);
45        Destination { map }
46    }
47
48    pub fn set(&mut self, key: Vec<u8>, value: Object) {
49        self.map.insert(key, value);
50    }
51
52    pub fn title(&self) -> Option<&Object> {
53        self.map.get(b"Title".as_slice())
54    }
55
56    pub fn page(&self) -> Option<&Object> {
57        self.map.get(b"Page".as_slice())
58    }
59}
60
61type OutlinePageIds = IndexMap<Vec<u8>, ((u32, u16), usize, usize)>;
62
63fn setup_outline_page_ids<'a>(
64    outlines: &'a Vec<Outline>,
65    result: &mut OutlinePageIds,
66    level: usize,
67) -> Result<&'a Vec<Outline>> {
68    for outline in outlines.iter() {
69        match outline {
70            Outline::Destination(destination) => {
71                result.insert(
72                    destination.title()?.as_str()?.to_vec(),
73                    (destination.page()?.as_reference()?, result.len(), level),
74                );
75            }
76            Outline::SubOutlines(sub_outlines) => {
77                setup_outline_page_ids(sub_outlines, result, level + 1)?;
78            }
79        }
80    }
81    Ok(outlines)
82}
83
84impl Document {
85    fn setup_page_id_to_num(&self) -> IndexMap<(u32, u16), u32> {
86        let mut result = IndexMap::new();
87        for (page_num, page_id) in self.get_pages() {
88            result.insert(page_id, page_num);
89        }
90        result
91    }
92
93    pub fn get_toc(&self) -> Result<Toc> {
94        let mut toc: Toc = Toc {
95            toc: Vec::new(),
96            errors: Vec::new(),
97        };
98        let mut named_destinations = IndexMap::new();
99
100        let Some(outlines) = self.get_outlines(None, None, &mut named_destinations)? else {
101            return Err(Error::NoOutline);
102        };
103
104        let mut outline_page_ids = IndexMap::new();
105        setup_outline_page_ids(&outlines, &mut outline_page_ids, 1)?;
106        let page_id_to_page_numbers = self.setup_page_id_to_num();
107        for (title, (page_id, _page_idx, level)) in outline_page_ids {
108            if let Some(page_num) = page_id_to_page_numbers.get(&page_id) {
109                let s;
110                if title.len() < 2 {
111                    s = String::from_utf8_lossy(&title).to_string();
112                } else if title[0] == 0xfe && title[1] == 0xff {
113                    if title.len() & 1 != 0 {
114                        toc.errors.push(format!(
115                            "Title encoded UTF16_BE {title:?} has invalid length!"
116                        ));
117                        continue;
118                    }
119                    let t16: Vec<u16> = title
120                        .chunks(2)
121                        .skip(1)
122                        .map(|x| ((x[0] as u16) << 8) | x[1] as u16)
123                        .collect();
124                    s = String::from_utf16_lossy(&t16);
125                } else if title[0] == 0xff && title[1] == 0xfe {
126                    if title.len() & 1 != 0 {
127                        toc.errors.push(format!(
128                            "Title encoded UTF16_LE {title:?} has invalid length!"
129                        ));
130                        continue;
131                    }
132                    let t16: Vec<u16> = title
133                        .chunks(2)
134                        .skip(1)
135                        .map(|x| ((x[1] as u16) << 8) | x[0] as u16)
136                        .collect();
137                    s = String::from_utf16_lossy(&t16);
138                } else {
139                    s = String::from_utf8_lossy(&title).to_string();
140                }
141                toc.toc.push(TocType {
142                    level,
143                    title: s,
144                    page: *page_num as usize,
145                });
146            }
147        }
148        Ok(toc)
149    }
150}