use crate::error::ConvertError;
use lopdf::{Document, dictionary};
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct PageRange {
pub start: u32,
pub end: u32,
}
impl PageRange {
pub fn new(start: u32, end: u32) -> Self {
Self { start, end }
}
pub fn parse(s: &str) -> Result<Self, String> {
if let Some((start_str, end_str)) = s.split_once('-') {
let start: u32 = start_str
.trim()
.parse()
.map_err(|_| format!("invalid start page: {start_str}"))?;
let end: u32 = end_str
.trim()
.parse()
.map_err(|_| format!("invalid end page: {end_str}"))?;
if start == 0 || end == 0 {
return Err("page numbers must be >= 1".to_string());
}
if start > end {
return Err(format!("start ({start}) must be <= end ({end})"));
}
Ok(Self::new(start, end))
} else {
let n: u32 = s
.trim()
.parse()
.map_err(|_| format!("invalid page number: {s}"))?;
if n == 0 {
return Err("page number must be >= 1".to_string());
}
Ok(Self::new(n, n))
}
}
}
fn load_pdf_document(input: &[u8], context: &str) -> Result<Document, ConvertError> {
Document::load_mem(input).map_err(|e| ConvertError::Parse(format!("invalid PDF{context}: {e}")))
}
fn validate_page_ranges(ranges: &[PageRange], total_pages: u32) -> Result<(), ConvertError> {
for range in ranges {
if range.start > total_pages || range.end > total_pages {
return Err(ConvertError::Parse(format!(
"page range {}-{} exceeds document page count ({total_pages})",
range.start, range.end
)));
}
}
Ok(())
}
fn save_pdf_to_bytes(doc: &mut Document, context: &str) -> Result<Vec<u8>, ConvertError> {
doc.compress();
let mut output: Vec<u8> = Vec::new();
doc.save_to(&mut output)
.map_err(|e| ConvertError::Render(format!("failed to write {context} PDF: {e}")))?;
Ok(output)
}
pub fn page_count(input: &[u8]) -> Result<u32, ConvertError> {
let doc: Document = load_pdf_document(input, "")?;
Ok(doc.get_pages().len() as u32)
}
pub fn merge(inputs: &[&[u8]]) -> Result<Vec<u8>, ConvertError> {
if inputs.is_empty() {
return Err(ConvertError::Parse("no input PDFs to merge".to_string()));
}
if inputs.len() == 1 {
return Ok(inputs[0].to_vec());
}
let documents: Vec<Document> = inputs
.iter()
.enumerate()
.map(|(i, data)| load_pdf_document(data, &format!(" at index {i}")))
.collect::<Result<_, _>>()?;
let mut max_id = 1;
let mut all_pages = Vec::new();
let mut all_objects = std::collections::BTreeMap::new();
for mut doc in documents {
doc.renumber_objects_with(max_id);
max_id = doc.max_id + 1;
let pages = doc.get_pages();
let mut page_ids: Vec<_> = pages.into_iter().collect();
page_ids.sort_by_key(|(num, _)| *num);
for (_, page_id) in &page_ids {
all_pages.push(*page_id);
}
for (id, object) in doc.objects {
if let Ok(dict) = object.as_dict()
&& dict
.get(b"Type")
.ok()
.and_then(|t| t.as_name().ok())
.is_some_and(|name| name == b"Catalog")
{
continue;
}
all_objects.insert(id, object);
}
}
let mut merged = Document::with_version("1.7");
for (id, object) in &all_objects {
merged.objects.insert(*id, object.clone());
}
merged.max_id = max_id;
let pages_id = merged.new_object_id();
let page_refs: Vec<lopdf::Object> = all_pages
.iter()
.map(|id| lopdf::Object::Reference(*id))
.collect();
let pages_dict = dictionary! {
"Type" => "Pages",
"Count" => all_pages.len() as i64,
"Kids" => page_refs,
};
merged
.objects
.insert(pages_id, lopdf::Object::Dictionary(pages_dict));
for page_id in &all_pages {
if let Some(object) = merged.objects.get_mut(page_id)
&& let Ok(page_dict) = object.as_dict_mut()
{
page_dict.set("Parent", lopdf::Object::Reference(pages_id));
}
}
let catalog_id = merged.new_object_id();
let catalog_dict = dictionary! {
"Type" => "Catalog",
"Pages" => pages_id,
};
merged
.objects
.insert(catalog_id, lopdf::Object::Dictionary(catalog_dict));
merged
.trailer
.set("Root", lopdf::Object::Reference(catalog_id));
let page_set: std::collections::HashSet<_> = all_pages.iter().collect();
let mut to_remove = Vec::new();
for (id, object) in &merged.objects {
if page_set.contains(id) || *id == pages_id || *id == catalog_id {
continue;
}
if let Ok(dict) = object.as_dict()
&& dict
.get(b"Type")
.ok()
.and_then(|t| t.as_name().ok())
.is_some_and(|name| name == b"Pages")
{
to_remove.push(*id);
}
}
for id in to_remove {
merged.objects.remove(&id);
}
save_pdf_to_bytes(&mut merged, "merged")
}
pub fn split(input: &[u8], ranges: &[PageRange]) -> Result<Vec<Vec<u8>>, ConvertError> {
if ranges.is_empty() {
return Err(ConvertError::Parse(
"no page ranges specified for split".to_string(),
));
}
let doc: Document = load_pdf_document(input, "")?;
let total_pages: u32 = doc.get_pages().len() as u32;
validate_page_ranges(ranges, total_pages)?;
let mut results = Vec::with_capacity(ranges.len());
for range in ranges {
let mut split_doc = doc.clone();
let pages_to_delete: Vec<u32> = (1..=total_pages)
.filter(|p| *p < range.start || *p > range.end)
.collect();
if !pages_to_delete.is_empty() {
split_doc.delete_pages(&pages_to_delete);
}
results.push(save_pdf_to_bytes(&mut split_doc, "split")?);
}
Ok(results)
}
#[cfg(test)]
#[path = "pdf_ops_tests.rs"]
mod tests;