use lopdf::{Document, Object, ObjectId};
#[derive(Debug, Clone, PartialEq)]
pub struct Outline {
pub title: String,
pub page: Option<usize>,
pub children: Vec<Outline>,
}
impl Outline {
pub fn new(title: impl Into<String>, page: usize) -> Self {
Self {
title: title.into(),
page: Some(page),
children: Vec::new(),
}
}
}
pub(crate) fn from_bookmark(b: &pdf_manip::bookmarks::Bookmark) -> Outline {
use pdf_manip::bookmarks::BookmarkAction;
let page = match &b.action {
BookmarkAction::GoTo { page, .. } => Some((*page).saturating_sub(1) as usize),
_ => None,
};
Outline {
title: b.title.clone(),
page,
children: b.children.iter().map(from_bookmark).collect(),
}
}
pub(crate) fn to_bookmark(o: &Outline) -> pdf_manip::bookmarks::Bookmark {
let page = o.page.map(|p| p as u32 + 1).unwrap_or(1);
pdf_manip::bookmarks::Bookmark::with_children(
o.title.clone(),
page,
o.children.iter().map(to_bookmark).collect(),
)
}
#[derive(Debug, Clone, PartialEq)]
#[non_exhaustive]
pub struct AnnotationInfo {
pub subtype: String,
pub rect: Option<[f64; 4]>,
pub contents: Option<String>,
}
fn obj_as_f64(o: &Object) -> Option<f64> {
match o {
Object::Integer(i) => Some(*i as f64),
Object::Real(r) => Some(*r as f64),
_ => None,
}
}
fn pdf_string(o: &Object) -> Option<String> {
o.as_str()
.ok()
.map(|b| String::from_utf8_lossy(b).into_owned())
}
pub(crate) fn read_annotations(doc: &Document, page_index: usize) -> Vec<AnnotationInfo> {
let page_ids: Vec<ObjectId> = doc.get_pages().into_values().collect();
let Some(&page_id) = page_ids.get(page_index) else {
return Vec::new();
};
let Ok(page_dict) = doc.get_dictionary(page_id) else {
return Vec::new();
};
let annots = match page_dict.get_deref(b"Annots", doc) {
Ok(Object::Array(a)) => a.clone(),
_ => return Vec::new(),
};
let mut out = Vec::new();
for entry in &annots {
let dict = match entry {
Object::Reference(id) => match doc.get_dictionary(*id) {
Ok(d) => d,
Err(_) => continue,
},
Object::Dictionary(d) => d,
_ => continue,
};
let subtype = dict
.get(b"Subtype")
.ok()
.and_then(|o| o.as_name().ok())
.map(|b| String::from_utf8_lossy(b).into_owned())
.unwrap_or_else(|| "Unknown".to_string());
let rect = dict
.get(b"Rect")
.ok()
.and_then(|o| o.as_array().ok())
.and_then(|a| {
if a.len() == 4 {
Some([
obj_as_f64(&a[0])?,
obj_as_f64(&a[1])?,
obj_as_f64(&a[2])?,
obj_as_f64(&a[3])?,
])
} else {
None
}
});
let contents = dict.get(b"Contents").ok().and_then(pdf_string);
out.push(AnnotationInfo {
subtype,
rect,
contents,
});
}
out
}
#[derive(Debug, Clone, PartialEq)]
#[non_exhaustive]
pub struct Attachment {
pub name: String,
pub size: usize,
}
fn catalog_names_dict(doc: &Document) -> Option<lopdf::Dictionary> {
let root = doc.trailer.get(b"Root").ok()?.as_reference().ok()?;
let catalog = doc.get_dictionary(root).ok()?;
match catalog.get_deref(b"Names", doc).ok()? {
Object::Dictionary(d) => Some(d.clone()),
_ => None,
}
}
fn walk_name_tree(
doc: &Document,
node: &lopdf::Dictionary,
out: &mut Vec<(String, ObjectId)>,
depth: usize,
) {
if depth > 64 {
return; }
if let Ok(Object::Array(names)) = node.get_deref(b"Names", doc) {
let mut i = 0;
while i + 1 < names.len() {
if let (Ok(name), Object::Reference(id)) = (names[i].as_str(), &names[i + 1]) {
out.push((String::from_utf8_lossy(name).into_owned(), *id));
} else if let Ok(name) = names[i].as_str() {
let _ = name;
}
i += 2;
}
}
if let Ok(Object::Array(kids)) = node.get_deref(b"Kids", doc) {
for kid in kids {
if let Object::Reference(id) = kid {
if let Ok(d) = doc.get_dictionary(*id) {
walk_name_tree(doc, d, out, depth + 1);
}
}
}
}
}
fn embedded_filespecs(doc: &Document) -> Vec<(String, ObjectId)> {
let mut out = Vec::new();
let Some(names) = catalog_names_dict(doc) else {
return out;
};
if let Ok(Object::Dictionary(node)) = names.get_deref(b"EmbeddedFiles", doc) {
walk_name_tree(doc, node, &mut out, 0);
}
out
}
fn filespec_bytes(doc: &Document, filespec_id: ObjectId) -> Option<(usize, Option<Vec<u8>>)> {
let filespec = doc.get_dictionary(filespec_id).ok()?;
let ef = match filespec.get_deref(b"EF", doc).ok()? {
Object::Dictionary(d) => d.clone(),
_ => return None,
};
let stream_id = ["F", "UF"]
.iter()
.find_map(|k| match ef.get(k.as_bytes()) {
Ok(Object::Reference(id)) => Some(*id),
_ => None,
})?;
let stream = doc.get_object(stream_id).ok()?.as_stream().ok()?;
let bytes = stream
.get_plain_content()
.ok()
.unwrap_or_else(|| stream.content.clone());
Some((bytes.len(), Some(bytes)))
}
pub(crate) fn read_attachments(doc: &Document) -> Vec<Attachment> {
embedded_filespecs(doc)
.into_iter()
.map(|(name, id)| {
let size = filespec_bytes(doc, id).map(|(s, _)| s).unwrap_or(0);
Attachment { name, size }
})
.collect()
}
pub(crate) fn read_attachment_bytes(doc: &Document, name: &str) -> Option<Vec<u8>> {
embedded_filespecs(doc)
.into_iter()
.find(|(n, _)| n == name)
.and_then(|(_, id)| filespec_bytes(doc, id))
.and_then(|(_, bytes)| bytes)
}