use crate::PdfData;
use crate::object::Object;
use crate::page::Pages;
use crate::page::cached::CachedPages;
use crate::reader::Reader;
use crate::sync::Arc;
use crate::xref::{XRef, XRefError, fallback, root_xref};
pub use crate::crypto::DecryptionError;
use crate::metadata::Metadata;
pub struct Pdf {
xref: Arc<XRef>,
header_version: PdfVersion,
pages: CachedPages,
data: PdfData,
}
#[derive(Debug, Copy, Clone, PartialEq, Eq)]
pub enum LoadPdfError {
Decryption(DecryptionError),
Invalid,
}
#[allow(clippy::len_without_is_empty)]
impl Pdf {
pub fn new(data: impl Into<PdfData>) -> Result<Self, LoadPdfError> {
Self::new_with_password(data, "")
}
pub fn new_with_password(
data: impl Into<PdfData>,
password: &str,
) -> Result<Self, LoadPdfError> {
let data = data.into();
let password = password.as_bytes();
let version = find_version(data.as_ref()).unwrap_or(PdfVersion::Pdf10);
let xref = match root_xref(data.clone(), password) {
Ok(x) => x,
Err(e) => match e {
XRefError::Unknown => {
fallback(data.clone(), password).ok_or(LoadPdfError::Invalid)?
}
XRefError::Encryption(e) => return Err(LoadPdfError::Decryption(e)),
},
};
let xref = Arc::new(xref);
let pages = CachedPages::new(xref.clone()).ok_or(LoadPdfError::Invalid)?;
Ok(Self {
xref,
header_version: version,
pages,
data,
})
}
pub fn len(&self) -> usize {
self.xref.len()
}
pub fn objects(&self) -> impl IntoIterator<Item = Object<'_>> {
self.xref.objects()
}
pub fn version(&self) -> PdfVersion {
self.xref
.trailer_data()
.version
.unwrap_or(self.header_version)
}
pub fn data(&self) -> &PdfData {
&self.data
}
pub fn pages(&self) -> &Pages<'_> {
self.pages.get()
}
pub fn xref(&self) -> &XRef {
&self.xref
}
pub fn metadata(&self) -> &Metadata {
self.xref.metadata()
}
}
fn find_version(data: &[u8]) -> Option<PdfVersion> {
let data = &data[..data.len().min(2000)];
let mut r = Reader::new(data);
while r.forward_tag(b"%PDF-").is_none() {
r.read_byte()?;
}
PdfVersion::from_bytes(r.tail()?)
}
#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord)]
pub enum PdfVersion {
Pdf10,
Pdf11,
Pdf12,
Pdf13,
Pdf14,
Pdf15,
Pdf16,
Pdf17,
Pdf20,
}
impl PdfVersion {
pub(crate) fn from_bytes(bytes: &[u8]) -> Option<Self> {
match bytes.get(..3)? {
b"1.0" => Some(Self::Pdf10),
b"1.1" => Some(Self::Pdf11),
b"1.2" => Some(Self::Pdf12),
b"1.3" => Some(Self::Pdf13),
b"1.4" => Some(Self::Pdf14),
b"1.5" => Some(Self::Pdf15),
b"1.6" => Some(Self::Pdf16),
b"1.7" => Some(Self::Pdf17),
b"2.0" => Some(Self::Pdf20),
_ => None,
}
}
}
#[cfg(test)]
mod tests {
use crate::pdf::{Pdf, PdfVersion};
#[test]
fn issue_49() {
let _ = Pdf::new(Vec::new());
}
#[test]
fn pdf_version_header() {
let data = std::fs::read("../hayro-tests/downloads/pdfjs/alphatrans.pdf").unwrap();
let pdf = Pdf::new(data).unwrap();
assert_eq!(pdf.version(), PdfVersion::Pdf17);
}
#[test]
fn pdf_version_catalog() {
let data = std::fs::read("../hayro-tests/downloads/pdfbox/2163.pdf").unwrap();
let pdf = Pdf::new(data).unwrap();
assert_eq!(pdf.version(), PdfVersion::Pdf14);
}
}