use std::panic::AssertUnwindSafe;
use std::path::Path;
use crate::error::PdfGrepError;
#[derive(Debug)]
pub struct PdfDocument {
doc: lopdf::Document,
page_numbers: Vec<u32>,
}
impl PdfDocument {
pub fn open(path: &Path, passwords: &[String]) -> Result<Self, PdfGrepError> {
let mut doc = lopdf::Document::load(path).map_err(|e| match e {
lopdf::Error::IO(io) => PdfGrepError::Io {
path: path.to_path_buf(),
source: io,
},
other => PdfGrepError::Pdf {
path: path.to_path_buf(),
message: other.to_string(),
},
})?;
if doc.is_encrypted() {
let mut authenticated = false;
if doc.decrypt("").is_ok() {
authenticated = true;
} else {
for pwd in passwords {
if doc.decrypt(pwd).is_ok() {
authenticated = true;
break;
}
}
}
if !authenticated {
return Err(PdfGrepError::Encrypted {
path: path.to_path_buf(),
});
}
}
let page_numbers: Vec<u32> = doc.get_pages().keys().copied().collect();
let mut page_numbers = page_numbers;
page_numbers.sort_unstable();
Ok(PdfDocument { doc, page_numbers })
}
pub fn from_bytes(bytes: &[u8], passwords: &[String]) -> Result<Self, PdfGrepError> {
let virtual_path = Path::new("<stdin>");
let mut doc = lopdf::Document::load_mem(bytes).map_err(|e| PdfGrepError::Pdf {
path: virtual_path.to_path_buf(),
message: e.to_string(),
})?;
if doc.is_encrypted() {
let mut authenticated = false;
if doc.decrypt("").is_ok() {
authenticated = true;
} else {
for pwd in passwords {
if doc.decrypt(pwd).is_ok() {
authenticated = true;
break;
}
}
}
if !authenticated {
return Err(PdfGrepError::Encrypted {
path: virtual_path.to_path_buf(),
});
}
}
let mut page_numbers: Vec<u32> = doc.get_pages().keys().copied().collect();
page_numbers.sort_unstable();
Ok(PdfDocument { doc, page_numbers })
}
#[must_use]
pub fn page_numbers(&self) -> &[u32] {
&self.page_numbers
}
pub fn extract_page(&self, page_num: u32) -> Result<String, String> {
let doc = &self.doc;
let result = std::panic::catch_unwind(AssertUnwindSafe(|| doc.extract_text(&[page_num])));
match result {
Ok(Ok(text)) => Ok(text),
Ok(Err(e)) => Err(format!("page {page_num}: {e}")),
Err(_) => Err(format!("page {page_num}: parser panic")),
}
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn non_existent_path_returns_io_error() {
let err = PdfDocument::open(Path::new("/definitely/not/a/real/pdf.pdf"), &[]).unwrap_err();
assert!(matches!(
err,
PdfGrepError::Io { .. } | PdfGrepError::Pdf { .. }
));
}
#[test]
fn invalid_bytes_return_pdf_error() {
let err = PdfDocument::from_bytes(b"NOT A PDF", &[]).unwrap_err();
assert!(matches!(err, PdfGrepError::Pdf { .. }));
}
}