1use crate::PdfData;
4use crate::crypto::DecryptionError;
5use crate::object::Object;
6use crate::page::Pages;
7use crate::page::cached::CachedPages;
8use crate::reader::Reader;
9use crate::xref::{XRef, XRefError, fallback, root_xref};
10use std::sync::Arc;
11
12pub struct Pdf {
14 xref: Arc<XRef>,
15 header_version: PdfVersion,
16 pages: CachedPages,
17 data: PdfData,
18}
19
20#[derive(Debug, Copy, Clone)]
22pub enum LoadPdfError {
23 Decryption(DecryptionError),
25 Invalid,
27}
28
29#[allow(clippy::len_without_is_empty)]
30impl Pdf {
31 pub fn new(data: PdfData) -> Result<Self, LoadPdfError> {
35 let version = find_version(data.as_ref().as_ref()).unwrap_or(PdfVersion::Pdf10);
36 let xref = match root_xref(data.clone()) {
37 Ok(x) => x,
38 Err(e) => match e {
39 XRefError::Unknown => fallback(data.clone()).ok_or(LoadPdfError::Invalid)?,
40 XRefError::Encryption(e) => return Err(LoadPdfError::Decryption(e)),
41 },
42 };
43 let xref = Arc::new(xref);
44
45 let pages = CachedPages::new(xref.clone()).ok_or(LoadPdfError::Invalid)?;
46
47 Ok(Self {
48 xref,
49 header_version: version,
50 pages,
51 data,
52 })
53 }
54
55 pub fn len(&self) -> usize {
57 self.xref.len()
58 }
59
60 pub fn objects(&self) -> impl IntoIterator<Item = Object<'_>> {
62 self.xref.objects()
63 }
64
65 pub fn version(&self) -> PdfVersion {
67 self.xref
68 .trailer_data()
69 .version
70 .unwrap_or(self.header_version)
71 }
72
73 pub fn data(&self) -> &PdfData {
75 &self.data
76 }
77
78 pub fn pages(&self) -> &Pages<'_> {
80 self.pages.get()
81 }
82
83 pub fn xref(&self) -> &XRef {
85 &self.xref
86 }
87}
88
89fn find_version(data: &[u8]) -> Option<PdfVersion> {
90 let data = &data[..data.len().min(2000)];
91 let mut r = Reader::new(data);
92
93 while r.forward_tag(b"%PDF-").is_none() {
94 r.read_byte()?;
95 }
96
97 PdfVersion::from_bytes(r.tail()?)
98}
99
100#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord)]
102pub enum PdfVersion {
103 Pdf10,
105 Pdf11,
107 Pdf12,
109 Pdf13,
111 Pdf14,
113 Pdf15,
115 Pdf16,
117 Pdf17,
119 Pdf20,
121}
122
123impl PdfVersion {
124 pub(crate) fn from_bytes(bytes: &[u8]) -> Option<PdfVersion> {
125 match bytes.get(..3)? {
126 b"1.0" => Some(PdfVersion::Pdf10),
127 b"1.1" => Some(PdfVersion::Pdf11),
128 b"1.2" => Some(PdfVersion::Pdf12),
129 b"1.3" => Some(PdfVersion::Pdf13),
130 b"1.4" => Some(PdfVersion::Pdf14),
131 b"1.5" => Some(PdfVersion::Pdf15),
132 b"1.6" => Some(PdfVersion::Pdf16),
133 b"1.7" => Some(PdfVersion::Pdf17),
134 b"2.0" => Some(PdfVersion::Pdf20),
135 _ => None,
136 }
137 }
138}
139
140#[cfg(test)]
141mod tests {
142 use crate::pdf::{Pdf, PdfVersion};
143 use std::sync::Arc;
144
145 #[test]
146 fn issue_49() {
147 let data = Arc::new([]);
148 let _ = Pdf::new(data);
149 }
150
151 #[test]
152 fn pdf_version_header() {
153 let data = std::fs::read("../hayro-tests/downloads/pdfjs/alphatrans.pdf").unwrap();
154 let pdf = Pdf::new(Arc::new(data)).unwrap();
155
156 assert_eq!(pdf.version(), PdfVersion::Pdf17);
157 }
158
159 #[test]
160 fn pdf_version_catalog() {
161 let data = std::fs::read("../hayro-tests/downloads/pdfbox/2163.pdf").unwrap();
162 let pdf = Pdf::new(Arc::new(data)).unwrap();
163
164 assert_eq!(pdf.version(), PdfVersion::Pdf14);
165 }
166}