use std::collections::HashSet;
use lopdf::{Document, Object, ObjectId};
pub fn extract_pages(src: &Document, page_nums: &[u32]) -> crate::Result<Document> {
let all_pages = src.get_pages(); let keep_ids: HashSet<ObjectId> = page_nums.iter()
.filter_map(|&n| all_pages.get(&(n + 1)).copied())
.collect();
if keep_ids.is_empty() {
return Err(crate::Error::Io(std::io::Error::new(
std::io::ErrorKind::InvalidInput,
"no valid page numbers provided",
)));
}
let mut new_doc = src.clone();
let pages_id: ObjectId = {
let catalog = new_doc.catalog()?;
catalog.get(b"Pages")?.as_reference()?
};
let new_kids: Vec<Object> = {
let pages_dict = new_doc.get_object(pages_id)?.as_dict()?;
pages_dict
.get(b"Kids")?
.as_array()?
.iter()
.filter(|obj| {
obj.as_reference()
.map(|id| keep_ids.contains(&id))
.unwrap_or(false)
})
.cloned()
.collect()
};
let new_count = new_kids.len() as i64;
if let Ok(Object::Dictionary(dict)) = new_doc.get_object_mut(pages_id) {
dict.set("Kids", Object::Array(new_kids));
dict.set("Count", Object::Integer(new_count));
}
Ok(new_doc)
}