use super::{OperationError, OperationResult, PageRange};
use crate::parser::{PdfDocument, PdfReader};
use crate::{Document, Page};
use std::fs::File;
use std::path::{Path, PathBuf};
#[derive(Debug, Clone)]
pub struct MergeOptions {
pub page_ranges: Option<Vec<PageRange>>,
pub preserve_bookmarks: bool,
pub preserve_forms: bool,
pub optimize: bool,
pub metadata_mode: MetadataMode,
}
impl Default for MergeOptions {
fn default() -> Self {
Self {
page_ranges: None,
preserve_bookmarks: true,
preserve_forms: false,
optimize: false,
metadata_mode: MetadataMode::FromFirst,
}
}
}
#[derive(Debug, Clone)]
pub enum MetadataMode {
FromFirst,
FromDocument(usize),
Custom {
title: Option<String>,
author: Option<String>,
subject: Option<String>,
keywords: Option<String>,
},
None,
}
#[derive(Debug)]
pub struct MergeInput {
pub path: PathBuf,
pub pages: Option<PageRange>,
}
impl MergeInput {
pub fn new<P: Into<PathBuf>>(path: P) -> Self {
Self {
path: path.into(),
pages: None,
}
}
pub fn with_pages<P: Into<PathBuf>>(path: P, pages: PageRange) -> Self {
Self {
path: path.into(),
pages: Some(pages),
}
}
}
pub struct PdfMerger {
inputs: Vec<MergeInput>,
options: MergeOptions,
}
impl PdfMerger {
pub fn new(options: MergeOptions) -> Self {
Self {
inputs: Vec::new(),
options,
}
}
pub fn add_input(&mut self, input: MergeInput) {
self.inputs.push(input);
}
pub fn add_inputs(&mut self, inputs: impl IntoIterator<Item = MergeInput>) {
self.inputs.extend(inputs);
}
pub fn merge(&mut self) -> OperationResult<Document> {
if self.inputs.is_empty() {
return Err(OperationError::NoPagesToProcess);
}
let mut output_doc = Document::new();
for input_idx in 0..self.inputs.len() {
let input_path = self.inputs[input_idx].path.clone();
let input_pages = self.inputs[input_idx].pages.clone();
let document = PdfReader::open_document(&input_path).map_err(|e| {
OperationError::ParseError(format!(
"Failed to open {}: {}",
input_path.display(),
e
))
})?;
let total_pages = document
.page_count()
.map_err(|e| OperationError::ParseError(e.to_string()))?
as usize;
let page_range = input_pages.as_ref().unwrap_or(&PageRange::All);
let page_indices = page_range.get_indices(total_pages)?;
for page_idx in page_indices {
let parsed_page = document
.get_page(page_idx as u32)
.map_err(|e| OperationError::ParseError(e.to_string()))?;
let page = Page::from_parsed_with_content(&parsed_page, &document)
.map_err(|e| OperationError::ParseError(e.to_string()))?;
output_doc.add_page(page);
}
match &self.options.metadata_mode {
MetadataMode::FromFirst if input_idx == 0 => {
self.copy_metadata(&document, &mut output_doc)?;
}
MetadataMode::FromDocument(idx) if input_idx == *idx => {
self.copy_metadata(&document, &mut output_doc)?;
}
_ => {}
}
}
if let MetadataMode::Custom {
title,
author,
subject,
keywords,
} = &self.options.metadata_mode
{
if let Some(title) = title {
output_doc.set_title(title);
}
if let Some(author) = author {
output_doc.set_author(author);
}
if let Some(subject) = subject {
output_doc.set_subject(subject);
}
if let Some(keywords) = keywords {
output_doc.set_keywords(keywords);
}
}
Ok(output_doc)
}
pub fn merge_to_file<P: AsRef<Path>>(&mut self, output_path: P) -> OperationResult<()> {
let mut doc = self.merge()?;
doc.save(output_path)?;
Ok(())
}
fn copy_metadata(
&self,
document: &PdfDocument<File>,
doc: &mut Document,
) -> OperationResult<()> {
if let Ok(metadata) = document.metadata() {
if let Some(title) = metadata.title {
doc.set_title(&title);
}
if let Some(author) = metadata.author {
doc.set_author(&author);
}
if let Some(subject) = metadata.subject {
doc.set_subject(&subject);
}
if let Some(keywords) = metadata.keywords {
doc.set_keywords(&keywords);
}
}
Ok(())
}
}
pub fn merge_pdfs<P: AsRef<Path>>(
inputs: Vec<MergeInput>,
output_path: P,
options: MergeOptions,
) -> OperationResult<()> {
let mut merger = PdfMerger::new(options);
merger.add_inputs(inputs);
merger.merge_to_file(output_path)
}
pub fn merge_pdf_files<P: AsRef<Path>, Q: AsRef<Path>>(
input_paths: &[P],
output_path: Q,
) -> OperationResult<()> {
let inputs: Vec<MergeInput> = input_paths
.iter()
.map(|p| MergeInput::new(p.as_ref()))
.collect();
merge_pdfs(inputs, output_path, MergeOptions::default())
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_merge_options_default() {
let options = MergeOptions::default();
assert!(options.page_ranges.is_none());
assert!(options.preserve_bookmarks);
assert!(!options.preserve_forms);
assert!(!options.optimize);
assert!(matches!(options.metadata_mode, MetadataMode::FromFirst));
}
#[test]
fn test_merge_input_creation() {
let input = MergeInput::new("test.pdf");
assert_eq!(input.path, PathBuf::from("test.pdf"));
assert!(input.pages.is_none());
let input_with_pages = MergeInput::with_pages("test.pdf", PageRange::Range(0, 4));
assert!(input_with_pages.pages.is_some());
}
#[test]
fn test_merge_options_with_custom_metadata() {
let options = MergeOptions {
page_ranges: Some(vec![PageRange::All]),
preserve_bookmarks: false,
preserve_forms: true,
optimize: true,
metadata_mode: MetadataMode::Custom {
title: Some("Merged Document".to_string()),
author: Some("PDF Merger".to_string()),
subject: Some("Combined PDFs".to_string()),
keywords: Some("merge, pdf".to_string()),
},
};
assert!(options.page_ranges.is_some());
assert!(!options.preserve_bookmarks);
assert!(options.preserve_forms);
assert!(options.optimize);
if let MetadataMode::Custom { title, .. } = options.metadata_mode {
assert_eq!(title, Some("Merged Document".to_string()));
} else {
panic!("Expected Custom metadata mode");
}
}
#[test]
fn test_merge_options_from_document() {
let options = MergeOptions {
metadata_mode: MetadataMode::FromDocument(2),
..Default::default()
};
if let MetadataMode::FromDocument(idx) = options.metadata_mode {
assert_eq!(idx, 2);
} else {
panic!("Expected FromDocument metadata mode");
}
}
#[test]
fn test_page_range_variants() {
let all_pages = PageRange::All;
assert!(matches!(all_pages, PageRange::All));
let single = PageRange::Single(5);
if let PageRange::Single(page) = single {
assert_eq!(page, 5);
} else {
panic!("Expected Single page range");
}
let range = PageRange::Range(1, 10);
if let PageRange::Range(start, end) = range {
assert_eq!(start, 1);
assert_eq!(end, 10);
} else {
panic!("Expected Range");
}
let list = PageRange::List(vec![1, 3, 5, 7]);
if let PageRange::List(pages) = list {
assert_eq!(pages, vec![1, 3, 5, 7]);
} else {
panic!("Expected List");
}
}
#[test]
fn test_merge_input_with_all_pages() {
let input = MergeInput::with_pages("document.pdf", PageRange::All);
assert_eq!(input.path, PathBuf::from("document.pdf"));
assert!(input.pages.is_some()); }
#[test]
fn test_merge_input_with_single_page() {
let input = MergeInput::with_pages("document.pdf", PageRange::Single(0));
assert_eq!(input.path, PathBuf::from("document.pdf"));
assert!(input.pages.is_some()); }
#[test]
fn test_merge_input_with_page_list() {
let pages = vec![0, 2, 4, 6];
let input = MergeInput::with_pages("document.pdf", PageRange::List(pages));
assert_eq!(input.path, PathBuf::from("document.pdf"));
assert!(input.pages.is_some());
}
#[test]
fn test_metadata_mode_all_variants() {
let from_first = MetadataMode::FromFirst;
assert!(matches!(from_first, MetadataMode::FromFirst));
let from_doc = MetadataMode::FromDocument(3);
assert!(matches!(from_doc, MetadataMode::FromDocument(3)));
let custom = MetadataMode::Custom {
title: Some("Title".to_string()),
author: None,
subject: None,
keywords: None,
};
assert!(matches!(custom, MetadataMode::Custom { .. }));
}
#[test]
fn test_merge_options_builder() {
let options = MergeOptions {
page_ranges: Some(vec![
PageRange::All,
PageRange::Range(0, 5),
PageRange::Single(10),
]),
preserve_bookmarks: true,
preserve_forms: true,
optimize: true,
metadata_mode: MetadataMode::FromFirst,
};
assert!(options.page_ranges.is_some());
let ranges = options.page_ranges.unwrap();
assert_eq!(ranges.len(), 3);
}
}
#[cfg(test)]
#[path = "merge_tests.rs"]
mod merge_tests;