use lopdf::{Document, Object, ObjectId};
use serde::{Deserialize, Serialize};
use std::collections::HashSet;
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct Bookmark {
pub title: String,
pub level: u32,
pub page_number: Option<u32>,
pub children: Vec<Bookmark>,
}
pub fn extract_bookmarks(doc: &Document) -> Vec<Bookmark> {
let catalog = match doc.catalog() {
Ok(c) => c,
Err(_) => return Vec::new(),
};
let outlines_obj = match catalog.get(b"Outlines") {
Ok(obj) => resolve(doc, obj),
Err(_) => return Vec::new(),
};
let outlines_dict = match outlines_obj.as_dict() {
Ok(d) => d,
Err(_) => return Vec::new(),
};
let first_ref = match outlines_dict.get(b"First") {
Ok(obj) => match obj {
Object::Reference(id) => *id,
_ => return Vec::new(),
},
Err(_) => return Vec::new(),
};
let page_ids = doc.get_pages();
let mut visited = HashSet::new();
read_outline_items(doc, first_ref, 0, &page_ids, &mut visited)
}
fn read_outline_items(
doc: &Document,
first_id: ObjectId,
level: u32,
page_ids: &std::collections::BTreeMap<u32, ObjectId>,
visited: &mut HashSet<ObjectId>,
) -> Vec<Bookmark> {
let mut bookmarks = Vec::new();
let mut current_id = Some(first_id);
while let Some(obj_id) = current_id {
if !visited.insert(obj_id) {
break;
}
let dict = match doc.get_object(obj_id).and_then(|o| o.as_dict()) {
Ok(d) => d,
Err(_) => break,
};
let title = match dict.get(b"Title") {
Ok(Object::String(bytes, _)) => String::from_utf8_lossy(bytes).to_string(),
_ => String::new(),
};
let page_number = resolve_bookmark_page(doc, dict, page_ids);
let children = match dict.get(b"First") {
Ok(Object::Reference(child_id)) => {
read_outline_items(doc, *child_id, level + 1, page_ids, visited)
}
_ => Vec::new(),
};
bookmarks.push(Bookmark {
title,
level,
page_number,
children,
});
current_id = match dict.get(b"Next") {
Ok(Object::Reference(next_id)) => Some(*next_id),
_ => None,
};
}
bookmarks
}
fn resolve_bookmark_page(
doc: &Document,
dict: &lopdf::Dictionary,
page_ids: &std::collections::BTreeMap<u32, ObjectId>,
) -> Option<u32> {
if let Ok(dest) = dict.get(b"Dest") {
return page_from_dest(doc, dest, page_ids);
}
if let Ok(action_obj) = dict.get(b"A") {
let action = resolve(doc, action_obj);
if let Ok(action_dict) = action.as_dict() {
if let Ok(dest) = action_dict.get(b"D") {
return page_from_dest(doc, dest, page_ids);
}
}
}
None
}
fn page_from_dest(
doc: &Document,
dest: &Object,
page_ids: &std::collections::BTreeMap<u32, ObjectId>,
) -> Option<u32> {
let dest = resolve(doc, dest);
let arr = match dest.as_array() {
Ok(a) if !a.is_empty() => a,
_ => return None,
};
let page_ref = match &arr[0] {
Object::Reference(id) => *id,
_ => return None,
};
page_ids
.iter()
.find(|(_, id)| **id == page_ref)
.map(|(num, _)| *num)
}
fn resolve<'a>(doc: &'a Document, obj: &'a Object) -> &'a Object {
match obj {
Object::Reference(id) => doc.get_object(*id).unwrap_or(obj),
_ => obj,
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_empty_document_no_bookmarks() {
let doc = Document::new();
let bookmarks = extract_bookmarks(&doc);
assert!(bookmarks.is_empty());
}
#[test]
fn test_bookmark_struct() {
let bm = Bookmark {
title: "Chapter 1".to_string(),
level: 0,
page_number: Some(1),
children: vec![Bookmark {
title: "Section 1.1".to_string(),
level: 1,
page_number: Some(3),
children: vec![],
}],
};
assert_eq!(bm.title, "Chapter 1");
assert_eq!(bm.children.len(), 1);
assert_eq!(bm.children[0].level, 1);
}
#[test]
fn test_visited_prevents_infinite_loop() {
let mut visited = HashSet::new();
let id = (1, 0);
assert!(visited.insert(id));
assert!(!visited.insert(id)); }
}