hayro_syntax/
pdf.rs

1//! The starting point for reading PDF files.
2
3use crate::PdfData;
4use crate::crypto::DecryptionError;
5use crate::object::Object;
6use crate::page::Pages;
7use crate::page::cached::CachedPages;
8use crate::reader::Reader;
9use crate::xref::{XRef, XRefError, fallback, root_xref};
10use std::sync::Arc;
11
12/// A PDF file.
13pub struct Pdf {
14    xref: Arc<XRef>,
15    header_version: PdfVersion,
16    pages: CachedPages,
17    data: PdfData,
18}
19
20/// An error that occurred while loading a PDF file.
21#[derive(Debug, Copy, Clone)]
22pub enum LoadPdfError {
23    /// An error occurred while processing an encrypted document.
24    Decryption(DecryptionError),
25    /// The PDF was invalid or could not be parsed due to some other unknown reason.
26    Invalid,
27}
28
29#[allow(clippy::len_without_is_empty)]
30impl Pdf {
31    /// Try to read the given PDF file.
32    ///
33    /// Returns `Err` if it was unable to read it.
34    pub fn new(data: PdfData) -> Result<Self, LoadPdfError> {
35        let version = find_version(data.as_ref().as_ref()).unwrap_or(PdfVersion::Pdf10);
36        let xref = match root_xref(data.clone()) {
37            Ok(x) => x,
38            Err(e) => match e {
39                XRefError::Unknown => fallback(data.clone()).ok_or(LoadPdfError::Invalid)?,
40                XRefError::Encryption(e) => return Err(LoadPdfError::Decryption(e)),
41            },
42        };
43        let xref = Arc::new(xref);
44
45        let pages = CachedPages::new(xref.clone()).ok_or(LoadPdfError::Invalid)?;
46
47        Ok(Self {
48            xref,
49            header_version: version,
50            pages,
51            data,
52        })
53    }
54
55    /// Return the number of objects present in the PDF file.
56    pub fn len(&self) -> usize {
57        self.xref.len()
58    }
59
60    /// Return an iterator over all objects defined in the PDF file.
61    pub fn objects(&self) -> impl IntoIterator<Item = Object<'_>> {
62        self.xref.objects()
63    }
64
65    /// Return the version of the PDF file.
66    pub fn version(&self) -> PdfVersion {
67        self.xref
68            .trailer_data()
69            .version
70            .unwrap_or(self.header_version)
71    }
72
73    /// Return the underlying data of the PDF file.
74    pub fn data(&self) -> &PdfData {
75        &self.data
76    }
77
78    /// Return the pages of the PDF file.
79    pub fn pages(&self) -> &Pages<'_> {
80        self.pages.get()
81    }
82
83    /// Return the xref of the PDF file.
84    pub fn xref(&self) -> &XRef {
85        &self.xref
86    }
87}
88
89fn find_version(data: &[u8]) -> Option<PdfVersion> {
90    let data = &data[..data.len().min(2000)];
91    let mut r = Reader::new(data);
92
93    while r.forward_tag(b"%PDF-").is_none() {
94        r.read_byte()?;
95    }
96
97    PdfVersion::from_bytes(r.tail()?)
98}
99
100/// The version of a PDF document.
101#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord)]
102pub enum PdfVersion {
103    /// PDF 1.0.
104    Pdf10,
105    /// PDF 1.1.
106    Pdf11,
107    /// PDF 1.2.
108    Pdf12,
109    /// PDF 1.3.
110    Pdf13,
111    /// PDF 1.4.
112    Pdf14,
113    /// PDF 1.5.
114    Pdf15,
115    /// PDF 1.6.
116    Pdf16,
117    /// PDF 1.7.
118    Pdf17,
119    /// PDF 2.0.
120    Pdf20,
121}
122
123impl PdfVersion {
124    pub(crate) fn from_bytes(bytes: &[u8]) -> Option<PdfVersion> {
125        match bytes.get(..3)? {
126            b"1.0" => Some(PdfVersion::Pdf10),
127            b"1.1" => Some(PdfVersion::Pdf11),
128            b"1.2" => Some(PdfVersion::Pdf12),
129            b"1.3" => Some(PdfVersion::Pdf13),
130            b"1.4" => Some(PdfVersion::Pdf14),
131            b"1.5" => Some(PdfVersion::Pdf15),
132            b"1.6" => Some(PdfVersion::Pdf16),
133            b"1.7" => Some(PdfVersion::Pdf17),
134            b"2.0" => Some(PdfVersion::Pdf20),
135            _ => None,
136        }
137    }
138}
139
140#[cfg(test)]
141mod tests {
142    use crate::pdf::{Pdf, PdfVersion};
143    use std::sync::Arc;
144
145    #[test]
146    fn issue_49() {
147        let data = Arc::new([]);
148        let _ = Pdf::new(data);
149    }
150
151    #[test]
152    fn pdf_version_header() {
153        let data = std::fs::read("../hayro-tests/downloads/pdfjs/alphatrans.pdf").unwrap();
154        let pdf = Pdf::new(Arc::new(data)).unwrap();
155
156        assert_eq!(pdf.version(), PdfVersion::Pdf17);
157    }
158
159    #[test]
160    fn pdf_version_catalog() {
161        let data = std::fs::read("../hayro-tests/downloads/pdfbox/2163.pdf").unwrap();
162        let pdf = Pdf::new(Arc::new(data)).unwrap();
163
164        assert_eq!(pdf.version(), PdfVersion::Pdf14);
165    }
166}